summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/coderay.rb4
-rw-r--r--lib/coderay/encoders/debug.rb10
-rw-r--r--lib/coderay/encoders/debug_lint.rb4
-rw-r--r--lib/coderay/rule_based_scanner.rb378
-rw-r--r--lib/coderay/scanners/_map.rb5
-rw-r--r--lib/coderay/scanners/c2.rb110
-rw-r--r--lib/coderay/scanners/c3.rb112
-rw-r--r--lib/coderay/scanners/c4.rb126
-rw-r--r--lib/coderay/scanners/css2.rb90
-rw-r--r--lib/coderay/scanners/java_script1.rb238
-rw-r--r--lib/coderay/scanners/java_script2.rb240
-rw-r--r--lib/coderay/scanners/java_script3.rb239
-rw-r--r--lib/coderay/scanners/java_script4.rb400
-rw-r--r--lib/coderay/scanners/java_script5.rb162
-rw-r--r--lib/coderay/scanners/json1.rb100
-rw-r--r--lib/coderay/scanners/json2.rb131
-rw-r--r--lib/coderay/scanners/json3.rb143
-rw-r--r--lib/coderay/scanners/json4.rb143
-rw-r--r--lib/coderay/scanners/json5.rb53
-rw-r--r--lib/coderay/scanners/lua2.rb157
-rw-r--r--lib/coderay/scanners/lua2b.rb157
-rw-r--r--lib/coderay/scanners/lua3.rb142
-rw-r--r--lib/coderay/scanners/lua4.rb89
-rw-r--r--lib/coderay/simple_scanner.rb40
-rw-r--r--lib/coderay/simple_scanner_dsl.rb381
-rw-r--r--lib/coderay/state_based_scanner.rb394
-rw-r--r--lib/coderay/version.rb2
-rw-r--r--rake_tasks/test.rake5
-rw-r--r--spec/simple_scanner_spec.rb28
-rw-r--r--spec/spec_helper.rb96
30 files changed, 4176 insertions, 3 deletions
diff --git a/lib/coderay.rb b/lib/coderay.rb
index c3de20b..c1c9e34 100644
--- a/lib/coderay.rb
+++ b/lib/coderay.rb
@@ -153,6 +153,10 @@ module CodeRay
autoload :Encoders, coderay_path('encoders')
autoload :Styles, coderay_path('styles')
+ # DSL Scanner
+ autoload :RuleBasedScanner, coderay_path('rule_based_scanner')
+ autoload :StateBasedScanner, coderay_path('state_based_scanner')
+
# convenience access and reusable Encoder/Scanner pair
autoload :Duo, coderay_path('duo')
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index f4db330..6b680fc 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -15,9 +15,12 @@ module Encoders
register_for :debug
+ attr_reader :size
+
FILE_EXTENSION = 'raydebug'
def text_token text, kind
+ @size += 1
if kind == :space
@out << text
else
@@ -43,6 +46,13 @@ module Encoders
@out << ']'
end
+ protected
+
+ def setup options
+ super
+ @size = 0
+ end
+
end
end
diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb
index a4eba2c..497d8c5 100644
--- a/lib/coderay/encoders/debug_lint.rb
+++ b/lib/coderay/encoders/debug_lint.rb
@@ -29,7 +29,7 @@ module Encoders
end
def end_group kind
- raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+ raise Lint::IncorrectTokenGroupNesting, 'We are inside %p, not %p (end_group)' % [@opened.reverse, kind] if @opened.last != kind
@opened.pop
super
end
@@ -40,7 +40,7 @@ module Encoders
end
def end_line kind
- raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+ raise Lint::IncorrectTokenGroupNesting, 'We are inside %p, not %p (end_line)' % [@opened.reverse, kind] if @opened.last != kind
@opened.pop
super
end
diff --git a/lib/coderay/rule_based_scanner.rb b/lib/coderay/rule_based_scanner.rb
new file mode 100644
index 0000000..0eb9222
--- /dev/null
+++ b/lib/coderay/rule_based_scanner.rb
@@ -0,0 +1,378 @@
+require 'set'
+
+module CodeRay
+ module Scanners
+ class RuleBasedScanner < Scanner
+
+ Pattern = Struct.new :pattern
+ Groups = Struct.new :token_kinds
+ Kind = Struct.new :token_kind
+ Push = Struct.new :state, :group
+ Pop = Struct.new :group
+ PushState = Struct.new :state
+ PopState = Class.new
+ Check = Struct.new :condition
+ CheckIf = Class.new Check
+ CheckUnless = Class.new Check
+ ValueSetter = Struct.new :targets, :value
+ Increment = Struct.new :targets, :operation, :value
+ Continue = Class.new
+
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @code ||= ""
+
+ @code << "when #{names.map(&:inspect).join(', ')}\n"
+
+ @first = true
+ instance_eval(&block)
+ @code << " else\n"
+ @code << " puts \"no match for \#{state.inspect} => skip char\"\n" if $DEBUG
+ @code << " encoder.text_token getch, :error\n"
+ @code << " end\n"
+ @code << " \n"
+ end
+
+ def on? pattern
+ pattern_expression = pattern.inspect
+ @code << " #{'els' unless @first}if check(#{pattern_expression})\n"
+
+ @first = true
+ yield
+ @code << " end\n"
+
+ @first = false
+ end
+
+ def on *pattern_and_actions
+ if index = pattern_and_actions.find_index { |item| !(item.is_a?(Check) || item.is_a?(Regexp) || item.is_a?(Pattern)) }
+ conditions = pattern_and_actions[0..index - 1] or raise 'I need conditions or a pattern!'
+ actions = pattern_and_actions[index..-1] or raise 'I need actions!'
+ else
+ raise "invalid rule structure: #{pattern_and_actions.map(&:class)}"
+ end
+
+ condition_expressions = []
+ if conditions
+ for condition in conditions
+ case condition
+ when CheckIf
+ case condition.condition
+ when Proc
+ condition_expressions << "#{make_callback(condition.condition)}"
+ when Symbol
+ condition_expressions << "#{condition.condition}"
+ else
+ raise "I don't know how to evaluate this check_if condition: %p" % [condition.condition]
+ end
+ when CheckUnless
+ case condition.condition
+ when Proc
+ condition_expressions << "!#{make_callback(condition.condition)}"
+ when Symbol
+ condition_expressions << "!#{condition.condition}"
+ else
+ raise "I don't know how to evaluate this check_unless condition: %p" % [condition.condition]
+ end
+ when Pattern
+ case condition.pattern
+ when Proc
+ condition_expressions << "match = scan(#{make_callback(condition.pattern)})"
+ else
+ raise "I don't know how to evaluate this pattern: %p" % [condition.pattern]
+ end
+ when Regexp
+ condition_expressions << "match = scan(#{condition.inspect})"
+ else
+ raise "I don't know how to evaluate this pattern/condition: %p" % [condition]
+ end
+ end
+ end
+
+ @code << " #{'els' unless @first}if #{condition_expressions.join(' && ')}\n"
+
+ for action in actions
+ case action
+ when String
+ raise
+ @code << " p 'evaluate #{action.inspect}'\n" if $DEBUG
+ @code << " #{action}\n"
+
+ when Symbol
+ @code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+ @code << " encoder.text_token match, #{action.inspect}\n"
+ when Kind
+ case action.token_kind
+ when Proc
+ @code << " encoder.text_token match, kind = #{make_callback(action.token_kind)}\n"
+ else
+ raise "I don't know how to evaluate this kind: %p" % [action.token_kind]
+ end
+ when Groups
+ @code << " p 'text_tokens %p in groups %p' % [match, #{action.token_kinds.inspect}]\n" if $DEBUG
+ action.token_kinds.each_with_index do |kind, i|
+ @code << " encoder.text_token self[#{i + 1}], #{kind.inspect} if self[#{i + 1}]\n"
+ end
+
+ when Push, PushState
+ case action.state
+ when String
+ raise
+ @code << " p 'push %p' % [#{action.state}]\n" if $DEBUG
+ @code << " state = #{action.state}\n"
+ @code << " states << state\n"
+ when Symbol
+ @code << " p 'push %p' % [#{action.state.inspect}]\n" if $DEBUG
+ @code << " state = #{action.state.inspect}\n"
+ @code << " states << state\n"
+ when Proc
+ @code << " if new_state = #{make_callback(action.state)}\n"
+ @code << " state = new_state\n"
+ @code << " states << new_state\n"
+ @code << " end\n"
+ else
+ raise "I don't know how to evaluate this push state: %p" % [action.state]
+ end
+ if action.is_a? Push
+ if action.state == action.group
+ @code << " encoder.begin_group state\n"
+ else
+ case action.state
+ when Symbol
+ @code << " p 'begin group %p' % [#{action.group.inspect}]\n" if $DEBUG
+ @code << " encoder.begin_group #{action.group.inspect}\n"
+ when Proc
+ @code << " encoder.begin_group #{make_callback(action.group)}\n"
+ else
+ raise "I don't know how to evaluate this push state: %p" % [action.state]
+ end
+ end
+ end
+ when Pop, PopState
+ @code << " p 'pop %p' % [states.last]\n" if $DEBUG
+ if action.is_a? Pop
+ if action.group
+ case action.group
+ when Symbol
+ @code << " encoder.end_group #{action.group.inspect}\n"
+ else
+ raise "I don't know how to evaluate this pop group: %p" % [action.group]
+ end
+ @code << " states.pop\n"
+ else
+ @code << " encoder.end_group states.pop\n"
+ end
+ else
+ @code << " states.pop\n"
+ end
+ @code << " state = states.last\n"
+
+ when ValueSetter
+ case action.value
+ when Proc
+ @code << " #{action.targets.join(' = ')} = #{make_callback(action.value)}\n"
+ when Symbol
+ @code << " #{action.targets.join(' = ')} = #{action.value}\n"
+ else
+ @code << " #{action.targets.join(' = ')} = #{action.value.inspect}\n"
+ end
+
+ when Increment
+ case action.value
+ when Proc
+ @code << " #{action.targets.join(' = ')} #{action.operation}= #{make_callback(action.value)}\n"
+ when Symbol
+ @code << " #{action.targets.join(' = ')} #{action.operation}= #{action.value}\n"
+ else
+ @code << " #{action.targets.join(' = ')} #{action.operation}= #{action.value.inspect}\n"
+ end
+
+ when Proc
+ @code << " #{make_callback(action)}\n"
+
+ when Continue
+ @code << " next\n"
+
+ else
+ raise "I don't know how to evaluate this action: %p" % [action]
+ end
+ end
+
+ @first = false
+ end
+
+ def groups *token_kinds
+ Groups.new token_kinds
+ end
+
+ def pattern pattern = nil, &block
+ Pattern.new pattern || block
+ end
+
+ def kind token_kind = nil, &block
+ Kind.new token_kind || block
+ end
+
+ def push state = nil, group = state, &block
+ raise 'push requires a state or a block; got nothing' unless state || block
+ Push.new state || block, group || block
+ end
+
+ def pop group = nil
+ Pop.new group
+ end
+
+ def push_state state = nil, &block
+ raise 'push_state requires a state or a block; got nothing' unless state || block
+ PushState.new state || block
+ end
+
+ def pop_state
+ PopState.new
+ end
+
+ def check_if value = nil, &callback
+ CheckIf.new value || callback
+ end
+
+ def check_unless value = nil, &callback
+ CheckUnless.new value || callback
+ end
+
+ def flag_on *flags
+ flags.each { |name| variables << name }
+ ValueSetter.new Array(flags), true
+ end
+
+ def flag_off *flags
+ flags.each { |name| variables << name }
+ ValueSetter.new Array(flags), false
+ end
+
+ def set flag, value = nil, &callback
+ variables << flag
+ ValueSetter.new [flag], value || callback || true
+ end
+
+ def unset *flags
+ flags.each { |name| variables << name }
+ ValueSetter.new Array(flags), nil
+ end
+
+ def increment *counters
+ counters.each { |name| variables << name }
+ Increment.new Array(counters), :+, 1
+ end
+
+ def decrement *counters
+ counters.each { |name| variables << name }
+ Increment.new Array(counters), :-, 1
+ end
+
+ def continue
+ Continue.new
+ end
+
+ def define_scan_tokens!
+ if ENV['PUTS']
+ puts CodeRay.scan(scan_tokens_code, :ruby).terminal
+ puts "callbacks: #{callbacks.size}"
+ end
+
+ class_eval scan_tokens_code
+ end
+
+ protected
+
+ def callbacks
+ @callbacks ||= {}
+ end
+
+ def variables
+ @variables ||= Set.new
+ end
+
+ def additional_variables
+ variables - %i(encoder options state states match kind)
+ end
+
+ def make_callback block
+ base_name = "__callback_line_#{block.source_location.last}"
+ callback_name = base_name
+ counter = 'a'
+ while callbacks.key?(callback_name)
+ callback_name = "#{base_name}_#{counter}"
+ counter.succ!
+ end
+
+ callbacks[callback_name] = define_method(callback_name, &block)
+
+ parameters = block.parameters
+
+ if parameters.empty?
+ callback_name
+ else
+ parameter_names = parameters.map(&:last)
+ parameter_names.each { |name| variables << name }
+ "#{callback_name}(#{parameter_names.join(', ')})"
+ end
+ end
+
+ def scan_tokens_code
+ <<-"RUBY"
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+#{ restore_local_variables_code.chomp.gsub(/^/, ' ' * 3) }
+
+ states = [state]
+
+ until eos?
+ case state
+#{ @code.chomp.gsub(/^/, ' ' * 4) }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ end
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+#{ close_groups_code.chomp.gsub(/^/, ' ' * 3) }
+
+ encoder
+ end
+ RUBY
+ end
+
+ def restore_local_variables_code
+ additional_variables.sort.map { |name| "#{name} = @#{name}" }.join("\n")
+ end
+
+ def close_groups_code
+ "close_groups(encoder, states)"
+ end
+ end
+
+ def scan_tokens tokens, options
+ self.class.define_scan_tokens!
+
+ scan_tokens tokens, options
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def close_groups encoder, states
+ # TODO
+ end
+
+ end
+ end
+end
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb
index a240298..61079d5 100644
--- a/lib/coderay/scanners/_map.rb
+++ b/lib/coderay/scanners/_map.rb
@@ -10,6 +10,11 @@ module Scanners
:eruby => :erb,
:irb => :ruby,
:javascript => :java_script,
+ :javascript1 => :java_script1,
+ :javascript2 => :java_script2,
+ :javascript3 => :java_script3,
+ :javascript4 => :java_script4,
+ :javascript5 => :java_script5,
:js => :java_script,
:pascal => :delphi,
:patch => :diff,
diff --git a/lib/coderay/scanners/c2.rb b/lib/coderay/scanners/c2.rb
new file mode 100644
index 0000000..3103e54
--- /dev/null
+++ b/lib/coderay/scanners/c2.rb
@@ -0,0 +1,110 @@
+module CodeRay
+module Scanners
+
+ # Scanner for C.
+ class C2 < RuleBasedScanner
+
+ register_for :c2
+ file_extension 'c'
+
+ KEYWORDS = [
+ 'asm', 'break', 'case', 'continue', 'default', 'do',
+ 'else', 'enum', 'for', 'goto', 'if', 'return',
+ 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
+ 'restrict', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_TYPES = [
+ 'int', 'long', 'short', 'char',
+ 'signed', 'unsigned', 'float', 'double',
+ 'bool', 'complex', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_CONSTANTS = [
+ 'EOF', 'NULL',
+ 'true', 'false', # added in C99
+ ] # :nodoc:
+ DIRECTIVES = [
+ 'auto', 'extern', 'register', 'static', 'void',
+ 'const', 'volatile', # added in C89
+ 'inline', # added in C99
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_TYPES, :predefined_type).
+ add(DIRECTIVES, :directive).
+ add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
+
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+
+ protected
+
+ state :initial do
+ on check_if(:in_preproc_line), %r/ \s*? \n \s* /x, :space, flag_off(:in_preproc_line), set(:label_expected, :label_expected_before_preproc_line)
+ on %r/ \s+ | \\\n /x, :space
+
+ on %r/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/(?![\/*])=? | \.(?!\d) /x, :operator, set(:label_expected) { |match, case_expected| match =~ /[;\{\}]/ || case_expected && match =~ /:/ }, flag_off(:case_expected)
+
+ on %r/ (?: case | default ) \b /x, :keyword, flag_on(:case_expected), flag_off(:label_expected)
+ on check_if(:label_expected), check_unless(:in_preproc_line), %r/ [A-Za-z_][A-Za-z_0-9]*+ :(?!:) /x, kind { |match|
+ kind = IDENT_KIND[match.chop]
+ kind == :ident ? :label : kind
+ }, set(:label_expected) { |kind| kind == :label }
+ on %r/ [A-Za-z_][A-Za-z_0-9]* /x, kind { |match| IDENT_KIND[match] }, flag_off(:label_expected)
+
+ on %r/(L)?(")/, push(:string), groups(:modifier, :delimiter)
+
+ on %r/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /x, :char, flag_off(:label_expected)
+ on %r/0[xX][0-9A-Fa-f]+/, :hex, flag_off(:label_expected)
+ on %r/(?:0[0-7]+)(?![89.eEfF])/, :octal, flag_off(:label_expected)
+ on %r/(?:\d+)(?![.eEfF])L?L?/, :integer, flag_off(:label_expected)
+ on %r/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, flag_off(:label_expected)
+
+ on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx, :comment
+ on %r/ \# \s* if \s* 0 /x, -> (match) {
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /mx) unless eos?
+ }, :comment
+ on %r/ \# [ \t]* include\b /x, :preprocessor, flag_on(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected), push_state(:include_expected)
+ on %r/ \# [ \t]* \w* /x, :preprocessor, flag_on(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected)
+
+ on %r/\$/, :ident
+ end
+
+ state :string do
+ on %r/[^\\\n"]+/, :content
+ on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mx, :char
+ on %r/"/, :delimiter, pop, flag_off(:label_expected)
+ on %r/ \\ /x, pop, :error, flag_off(:label_expected)
+ on %r/ $ /x, pop, flag_off(:label_expected)
+ end
+
+ state :include_expected do
+ on %r/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/, :include, pop_state
+ on %r/ \s*? \n \s* /x, :space, pop_state
+ on %r/\s+/, :space
+ on %r//, pop_state # TODO: add otherwise method for this
+ end
+
+ protected
+
+ def setup
+ super
+
+ @label_expected = true
+ @case_expected = false
+ @label_expected_before_preproc_line = nil
+ @in_preproc_line = false
+ end
+
+ def close_groups encoder, states
+ if states.last == :string
+ encoder.end_group :string
+ end
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/c3.rb b/lib/coderay/scanners/c3.rb
new file mode 100644
index 0000000..49555ca
--- /dev/null
+++ b/lib/coderay/scanners/c3.rb
@@ -0,0 +1,112 @@
+module CodeRay
+module Scanners
+
+ # Scanner for C.
+ class C3 < RuleBasedScanner
+
+ register_for :c3
+ file_extension 'c'
+
+ KEYWORDS = [
+ 'asm', 'break', 'case', 'continue', 'default', 'do',
+ 'else', 'enum', 'for', 'goto', 'if', 'return',
+ 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
+ 'restrict', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_TYPES = [
+ 'int', 'long', 'short', 'char',
+ 'signed', 'unsigned', 'float', 'double',
+ 'bool', 'complex', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_CONSTANTS = [
+ 'EOF', 'NULL',
+ 'true', 'false', # added in C99
+ ] # :nodoc:
+ DIRECTIVES = [
+ 'auto', 'extern', 'register', 'static', 'void',
+ 'const', 'volatile', # added in C89
+ 'inline', # added in C99
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_TYPES, :predefined_type).
+ add(DIRECTIVES, :directive).
+ add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
+
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+
+ protected
+
+ state :initial do
+ on check_if(:in_preproc_line), %r/ \s*? \n \s* /x, :space, unset(:in_preproc_line), set(:label_expected, :label_expected_before_preproc_line)
+ on %r/ \s+ | \\\n /x, :space
+
+ on %r/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/(?![\/*])=? | \.(?!\d) /x, :operator, set(:label_expected) { |match, case_expected|
+ match =~ /[;\{\}]/ || case_expected && match =~ /:/
+ }, unset(:case_expected)
+
+ on %r/ (?: case | default ) \b /x, :keyword, set(:case_expected), unset(:label_expected)
+ on check_if(:label_expected), check_unless(:in_preproc_line), %r/ [A-Za-z_][A-Za-z_0-9]*+ :(?!:) /x, kind { |match|
+ kind = IDENT_KIND[match.chop]
+ kind == :ident ? :label : kind
+ }, set(:label_expected) { |kind| kind == :label }
+ on %r/ [A-Za-z_][A-Za-z_0-9]* /x, kind { |match| IDENT_KIND[match] }, unset(:label_expected)
+
+ on %r/(L)?(")/, push(:string), groups(:modifier, :delimiter)
+
+ on %r/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /x, :char, unset(:label_expected)
+ on %r/0[xX][0-9A-Fa-f]+/, :hex, unset(:label_expected)
+ on %r/(?:0[0-7]+)(?![89.eEfF])/, :octal, unset(:label_expected)
+ on %r/(?:\d+)(?![.eEfF])L?L?/, :integer, unset(:label_expected)
+ on %r/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, unset(:label_expected)
+
+ on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx, :comment
+ on %r/ \# \s* if \s* 0 /x, -> (match) {
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /mx) unless eos?
+ }, :comment
+ on %r/ \# [ \t]* include\b /x, :preprocessor, set(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected), push_state(:include_expected)
+ on %r/ \# [ \t]* \w* /x, :preprocessor, set(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected)
+
+ on %r/\$/, :ident
+ end
+
+ state :string do
+ on %r/[^\\\n"]+/, :content
+ on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mx, :char
+ on %r/"/, :delimiter, pop, unset(:label_expected)
+ on %r/ \\ /x, pop, :error, unset(:label_expected)
+ on %r/ $ /x, pop, unset(:label_expected)
+ end
+
+ state :include_expected do
+ on %r/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/, :include, pop_state
+ on %r/ \s*? \n \s* /x, :space, pop_state
+ on %r/\s+/, :space
+ on %r//, pop_state # TODO: add otherwise method for this
+ end
+
+ protected
+
+ def setup
+ super
+
+ @label_expected = true
+ @case_expected = false
+ @label_expected_before_preproc_line = nil
+ @in_preproc_line = false
+ end
+
+ def close_groups encoder, states
+ if states.last == :string
+ encoder.end_group :string
+ end
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/c4.rb b/lib/coderay/scanners/c4.rb
new file mode 100644
index 0000000..ff67e49
--- /dev/null
+++ b/lib/coderay/scanners/c4.rb
@@ -0,0 +1,126 @@
+module CodeRay
+module Scanners
+
+ # Scanner for C.
+ class C4 < StateBasedScanner
+
+ register_for :c4
+ file_extension 'c'
+
+ KEYWORDS = [
+ 'asm', 'break', 'case', 'continue', 'default', 'do',
+ 'else', 'enum', 'for', 'goto', 'if', 'return',
+ 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
+ 'restrict', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_TYPES = [
+ 'int', 'long', 'short', 'char',
+ 'signed', 'unsigned', 'float', 'double',
+ 'bool', 'complex', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_CONSTANTS = [
+ 'EOF', 'NULL',
+ 'true', 'false', # added in C99
+ ] # :nodoc:
+ DIRECTIVES = [
+ 'auto', 'extern', 'register', 'static', 'void',
+ 'const', 'volatile', # added in C89
+ 'inline', # added in C99
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_TYPES, :predefined_type).
+ add(DIRECTIVES, :directive).
+ add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
+
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+
+ protected
+
+ state :initial do
+ check in_preproc_line? do
+ skip %r/ \s*? \n \s* /x, :space do
+ unset :in_preproc_line
+ expect :label if label_expected_before_preproc_line?
+ end
+ end
+
+ skip %r/ \s+ | \\\n /x, :space
+
+ on %r/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/(?![\/*])=? | \.(?!\d) /x, :operator do |match, case_expected|
+ expect :label if match =~ /[;\{\}]/ || expected?(:case) && match =~ /:/
+ end
+
+ on %r/ (?: case | default ) \b /x, :keyword do
+ expect :case
+ end
+
+ check label_expected?, !in_preproc_line? do
+ on %r/ [A-Za-z_][A-Za-z_0-9]*+ :(?!:) /x, -> match {
+ kind = IDENT_KIND[match.chop]
+ kind == :ident ? :label : kind
+ } do |kind|
+ expect :label if kind == :label
+ end
+ end
+
+ on %r/ [A-Za-z_][A-Za-z_0-9]* /x, IDENT_KIND
+
+ on %r/(L)?(")/, push(:string), groups(:modifier, :delimiter)
+
+ on %r/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /x, :char
+ on %r/0[xX][0-9A-Fa-f]+/, :hex
+ on %r/(?:0[0-7]+)(?![89.eEfF])/, :octal
+ on %r/(?:\d+)(?![.eEfF])L?L?/, :integer
+ on %r/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float
+
+ skip %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx, :comment
+ on %r/ \# \s* if \s* 0 /x, -> (match) {
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /mx) unless eos?
+ }, :comment
+ on %r/ \# [ \t]* include\b /x, :preprocessor, set(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected), push(:include)
+ on %r/ \# [ \t]* \w* /x, :preprocessor, set(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected)
+
+ on %r/\$/, :ident
+ end
+
+ group_state :string do
+ on %r/[^\\\n"]+/, :content
+ on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mx, :char
+ on %r/"/, :delimiter, pop
+ on %r/ \\ /x, pop, :error
+ on %r/ $ /x, pop
+ end
+
+ state :include do
+ on %r/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/, :include, pop
+ on %r/ \s*? \n \s* /x, :space, pop
+ on %r/\s+/, :space
+ otherwise pop
+ end
+
+ protected
+
+ def setup
+ super
+
+ @label_expected = true
+ @case_expected = false
+ @label_expected_before_preproc_line = nil
+ @in_preproc_line = false
+ end
+
+ def close_groups encoder, states
+ if states.last == :string
+ encoder.end_group :string
+ end
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/css2.rb b/lib/coderay/scanners/css2.rb
new file mode 100644
index 0000000..0c0d4a0
--- /dev/null
+++ b/lib/coderay/scanners/css2.rb
@@ -0,0 +1,90 @@
+module CodeRay
+module Scanners
+
+ class CSS2 < RuleBasedScanner
+
+ register_for :css2
+
+ KINDS_NOT_LOC = [
+ :comment,
+ :class, :pseudo_class, :tag,
+ :id, :directive,
+ :key, :value, :operator, :color, :float, :string,
+ :error, :important, :type,
+ ] # :nodoc:
+
+ module RE # :nodoc:
+ Hex = /[0-9a-fA-F]/
+ Unicode = /\\#{Hex}{1,6}\b/ # differs from standard because it allows uppercase hex too
+ Escape = /#{Unicode}|\\[^\n0-9a-fA-F]/
+ NMChar = /[-_a-zA-Z0-9]/
+ NMStart = /[_a-zA-Z]/
+ String1 = /(")((?:[^\n\\"]+|\\\n|#{Escape})+)?(")?/ # TODO: buggy regexp
+ String2 = /(')((?:[^\n\\']+|\\\n|#{Escape})+)?(')?/ # TODO: buggy regexp
+ String = /#{String1}|#{String2}/
+
+ HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/
+
+ Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)n?/
+ Name = /#{NMChar}+/
+ Ident = /-?#{NMStart}#{NMChar}*/
+ AtKeyword = /@#{Ident}/
+ Percentage = /#{Num}%/
+
+ reldimensions = %w[em ex px]
+ absdimensions = %w[in cm mm pt pc]
+ Unit = Regexp.union(*(reldimensions + absdimensions + %w[s dpi dppx deg]))
+
+ Dimension = /#{Num}#{Unit}/
+
+ Function = /((?:url|alpha|attr|counters?)\()((?:[^)\n]|\\\))+)?(\))?/
+
+ Id = /(?!#{HexColor}\b(?!-))##{Name}/
+ Class = /\.#{Name}/
+ PseudoClass = /::?#{Ident}/
+ AttributeSelector = /(\[)([^\]]+)?(\])?/
+ end
+
+ state :initial do
+ on %r/\s+/, :space
+
+ on check_if(:block), check_if(:value_expected), %r/(?>#{RE::Ident})(?!\()/x, :value
+ on check_if(:block), %r/(?>#{RE::Ident})(?!\()/x, :key
+
+ on check_unless(:block), %r/(?>#{RE::Ident})(?!\()|\*/x, :tag
+ on check_unless(:block), RE::Class, :class
+ on check_unless(:block), RE::Id, :id
+ on check_unless(:block), RE::PseudoClass, :pseudo_class
+ # TODO: Improve highlighting inside of attribute selectors.
+ on check_unless(:block), RE::AttributeSelector, groups(:operator, :attribute_name, :operator)
+ on check_unless(:block), %r/(@media)(\s+)?(#{RE::Ident})?(\s+)?(\{)?/, groups(:directive, :space, :type, :space, :operator)
+
+ on %r/\/\*(?:.*?\*\/|\z)/m, :comment
+ on %r/\{/, :operator, flag_off(:value_expected), flag_on(:block)
+ on %r/\}/, :operator, flag_off(:value_expected), flag_off(:block)
+ on RE::String1, push(:string), groups(:delimiter, :content, :delimiter), pop
+ on RE::String2, push(:string), groups(:delimiter, :content, :delimiter), pop
+ on RE::Function, push(:function), groups(:delimiter, :content, :delimiter), pop
+ on %r/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/x, :float
+ on RE::HexColor, :color
+ on %r/! *important/, :important
+ on %r/(?:rgb|hsl)a?\([^()\n]*\)?/, :color
+ on RE::AtKeyword, :directive
+ on %r/:/, :operator, flag_on(:value_expected)
+ on %r/;/, :operator, flag_off(:value_expected)
+ on %r/ [+>~,.=()\/] /x, :operator
+ end
+
+ protected
+
+ def setup
+ super
+
+ @value_expected = false
+ @block = false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script1.rb b/lib/coderay/scanners/java_script1.rb
new file mode 100644
index 0000000..4fe59ba
--- /dev/null
+++ b/lib/coderay/scanners/java_script1.rb
@@ -0,0 +1,238 @@
+# like java_script.rb
+# - but uses instance instead of local variables for flags
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript1 < Scanner
+
+ register_for :java_script1
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def scan_tokens encoder, options
+
+ state, @string_delimiter = options[:state] || @state
+ if @string_delimiter
+ encoder.begin_group state
+ end
+
+ @value_expected = true
+ @key_expected = false
+ @function_expected = false
+
+ until eos?
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ @value_expected = true if !@value_expected && match.index(?\n)
+ encoder.text_token match, :space
+
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
+ @value_expected = true
+ encoder.text_token match, :comment
+ state = :open_multi_line_comment if self[1]
+
+ elsif check(/\.?\d/)
+ @key_expected = @value_expected = false
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :octal
+ elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+/)
+ encoder.text_token match, :integer
+ end
+
+ elsif @value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ @value_expected = false
+
+ elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
+ @value_expected = true
+ last_operator = match[-1]
+ @key_expected = (last_operator == ?{) || (last_operator == ?,)
+ @function_expected = false
+ encoder.text_token match, :operator
+
+ elsif match = scan(/ [)\]}]+ /x)
+ @function_expected = @key_expected = @value_expected = false
+ encoder.text_token match, :operator
+
+ elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
+ kind = IDENT_KIND[match]
+ @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif @function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif @key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ @function_expected = (kind == :keyword) && (match == 'function')
+ @key_expected = false
+ encoder.text_token match, kind
+
+ elsif match = scan(/["']/)
+ if @key_expected && check(KEY_CHECK_PATTERN[match])
+ state = :key
+ else
+ state = :string
+ end
+ encoder.begin_group state
+ @string_delimiter = match
+ encoder.text_token match, :delimiter
+
+ elsif @value_expected && (match = scan(/\//))
+ encoder.begin_group :regexp
+ state = :regexp
+ @string_delimiter = '/'
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ \/ /x)
+ @value_expected = true
+ @key_expected = false
+ encoder.text_token match, :operator
+
+ else
+ encoder.text_token getch, :error
+
+ end
+
+ when :string, :regexp, :key
+ if match = scan(STRING_CONTENT_PATTERN[@string_delimiter])
+ encoder.text_token match, :content
+ elsif match = scan(/["'\/]/)
+ encoder.text_token match, :delimiter
+ if state == :regexp
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
+ encoder.end_group state
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ state = :initial
+ elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ encoder.text_token match, :content
+ else
+ encoder.text_token match, :char
+ end
+ elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group state
+ encoder.text_token match, :error unless match.empty?
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ state = :initial
+ else
+ raise_inspect "else case #{@string_delimiter} reached; %p not handled." % peek(1), encoder
+ end
+
+ when :open_multi_line_comment
+ if match = scan(%r! .*? \*/ !mx)
+ state = :initial
+ else
+ match = scan(%r! .+ !mx)
+ end
+ @value_expected = true
+ encoder.text_token match, :comment if match
+
+ else
+ #:nocov:
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ #:nocov:
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state, @string_delimiter
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ protected
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script2.rb b/lib/coderay/scanners/java_script2.rb
new file mode 100644
index 0000000..42fa640
--- /dev/null
+++ b/lib/coderay/scanners/java_script2.rb
@@ -0,0 +1,240 @@
+# like java_script.rb
+# - but uses instance instead of local variables for flags
+# - but uses the same rule logic as java_script4.rb
+# - also uses states array push/pop
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript2 < Scanner
+
+ register_for :java_script2
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def scan_tokens encoder, options
+
+ state, @string_delimiter = options[:state] || @state
+ if @string_delimiter
+ encoder.begin_group state
+ end
+
+ @value_expected = true
+ @key_expected = false
+ @function_expected = false
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ encoder.text_token match, :space
+ @value_expected = true if !@value_expected && match.index(?\n)
+
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
+ encoder.text_token match, :comment
+ @value_expected = true
+ # state = :open_multi_line_comment if self[1]
+
+ elsif check(/\.?\d/)
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :octal
+ elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+/)
+ encoder.text_token match, :integer
+ end
+ @key_expected = @value_expected = false
+
+ elsif @value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ @value_expected = false
+
+ elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
+ encoder.text_token match, :operator
+ @value_expected = true
+ @key_expected = /[{,]$/ === match
+ @function_expected = false
+
+ elsif match = scan(/ [)\]}]+ /x)
+ encoder.text_token match, :operator
+ @function_expected = @key_expected = @value_expected = false
+
+ elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
+ kind = IDENT_KIND[match]
+ @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif @function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif @key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ encoder.text_token match, kind
+ @function_expected = (kind == :keyword) && (match == 'function')
+ @key_expected = false
+
+ elsif match = scan(/["']/)
+ state = (@key_expected && check(KEY_CHECK_PATTERN[match])) ? :key : :string
+ states << state
+ encoder.begin_group state
+ @string_delimiter = match
+ encoder.text_token match, :delimiter
+
+ elsif @value_expected && (match = scan(/\//))
+ state = :regexp
+ states << state
+ encoder.begin_group state
+ @string_delimiter = '/'
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ \/ /x)
+ @value_expected = true
+ @key_expected = false
+ encoder.text_token match, :operator
+
+ else
+ encoder.text_token getch, :error
+
+ end
+
+ when :string, :regexp, :key
+ if match = scan(STRING_CONTENT_PATTERN[@string_delimiter])
+ encoder.text_token match, :content
+ elsif match = scan(/["'\/]/)
+ encoder.text_token match, :delimiter
+ if match == '/'
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ encoder.end_group states.pop
+ state = states.last
+ elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ encoder.text_token match, :content
+ else
+ encoder.text_token match, :char
+ end
+ elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group states.pop
+ state = states.last
+ encoder.text_token match, :error unless match.empty?
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ else
+ raise_inspect "else case #{@string_delimiter} reached; %p not handled." % peek(1), encoder
+ end
+
+ # when :open_multi_line_comment
+ # if match = scan(%r! .*? \*/ !mx)
+ # states.pop
+ # state = states.last
+ # else
+ # match = scan(%r! .+ !mx)
+ # end
+ # @value_expected = true
+ # encoder.text_token match, :comment if match
+
+ else
+ #:nocov:
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ #:nocov:
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state, @string_delimiter
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ protected
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script3.rb b/lib/coderay/scanners/java_script3.rb
new file mode 100644
index 0000000..9492967
--- /dev/null
+++ b/lib/coderay/scanners/java_script3.rb
@@ -0,0 +1,239 @@
+# like java_script.rb
+# - but uses the same rule logic as java_script4.rb
+# - also uses states array push/pop
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript3 < Scanner
+
+ register_for :java_script3
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def scan_tokens encoder, options
+
+ state, string_delimiter = options[:state] || @state
+ if string_delimiter
+ encoder.begin_group state
+ end
+
+ value_expected = true
+ key_expected = false
+ function_expected = false
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ encoder.text_token match, :space
+ value_expected = true if !value_expected && match.index(?\n)
+
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
+ encoder.text_token match, :comment
+ value_expected = true
+ # state = :open_multi_line_comment if self[1]
+
+ elsif check(/\.?\d/)
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :octal
+ elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+/)
+ encoder.text_token match, :integer
+ end
+ key_expected = value_expected = false
+
+ elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ value_expected = false
+
+ elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
+ encoder.text_token match, :operator
+ value_expected = true
+ key_expected = /[{,]$/ === match
+ function_expected = false
+
+ elsif match = scan(/ [)\]}]+ /x)
+ encoder.text_token match, :operator
+ function_expected = key_expected = value_expected = false
+
+ elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
+ kind = IDENT_KIND[match]
+ value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ encoder.text_token match, kind
+ function_expected = (kind == :keyword) && (match == 'function')
+ key_expected = false
+
+ elsif match = scan(/["']/)
+ state = (key_expected && check(KEY_CHECK_PATTERN[match])) ? :key : :string
+ states << state
+ encoder.begin_group state
+ string_delimiter = match
+ encoder.text_token match, :delimiter
+
+ elsif value_expected && (match = scan(/\//))
+ state = :regexp
+ states << state
+ encoder.begin_group state
+ string_delimiter = '/'
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ \/ /x)
+ value_expected = true
+ key_expected = false
+ encoder.text_token match, :operator
+
+ else
+ encoder.text_token getch, :error
+
+ end
+
+ when :string, :regexp, :key
+ if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
+ encoder.text_token match, :content
+ elsif match = scan(/["'\/]/)
+ encoder.text_token match, :delimiter
+ if match == '/'
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
+ string_delimiter = nil
+ key_expected = value_expected = false
+ encoder.end_group states.pop
+ state = states.last
+ elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ encoder.text_token match, :content
+ else
+ encoder.text_token match, :char
+ end
+ elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group states.pop
+ state = states.last
+ encoder.text_token match, :error unless match.empty?
+ string_delimiter = nil
+ key_expected = value_expected = false
+ else
+ raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
+ end
+
+ # when :open_multi_line_comment
+ # if match = scan(%r! .*? \*/ !mx)
+ # states.pop
+ # state = states.last
+ # else
+ # match = scan(%r! .+ !mx)
+ # end
+ # value_expected = true
+ # encoder.text_token match, :comment if match
+
+ else
+ #:nocov:
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ #:nocov:
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state, string_delimiter
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ protected
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script4.rb b/lib/coderay/scanners/java_script4.rb
new file mode 100644
index 0000000..7899a8d
--- /dev/null
+++ b/lib/coderay/scanners/java_script4.rb
@@ -0,0 +1,400 @@
+# TODO: string_delimiter should be part of the state: push(:regexp, '/'), check_if -> (state, delimiter) { … }
+module CodeRay
+module Scanners
+
+ class JavaScript4RuleBasedScanner < Scanner
+
+ CheckIf = Struct.new :condition
+
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @@code ||= ""
+
+ @@code << "when #{names.map(&:inspect).join(', ')}\n"
+
+ @@first = true
+ instance_eval(&block)
+ @@code << " else\n"
+ # @@code << " raise 'no match for #{names.map(&:inspect).join(', ')}'\n"
+ @@code << " encoder.text_token getch, :error\n"
+ @@code << " end\n"
+ @@code << " \n"
+ end
+
+ def on? pattern
+ pattern_expression = pattern.inspect
+ @@code << " #{'els' unless @@first}if check(#{pattern_expression})\n"
+
+ @@first = true
+ yield
+ @@code << " end\n"
+
+ @@first = false
+ end
+
+ def on *pattern_and_actions
+ if index = pattern_and_actions.find_index { |item| !item.is_a?(CheckIf) }
+ preconditions = pattern_and_actions[0..index - 1] if index > 0
+ pattern = pattern_and_actions[index] or raise 'I need a pattern!'
+ actions = pattern_and_actions[index + 1..-1] or raise 'I need actions!'
+ end
+
+ precondition_expression = ''
+ if preconditions
+ for precondition in preconditions
+ case precondition
+ when CheckIf
+ case precondition.condition
+ when Proc
+ callback = make_callback(precondition.condition)
+ case precondition.condition.arity
+ when 0
+ arguments = ''
+ when 1
+ arguments = '(state)'
+ else
+ raise "I got %p arguments for precondition: %p, but I only know how to evaluate 0..1" % [precondition.condition.arity, callback]
+ end
+ precondition_expression << "#{callback}#{arguments} && "
+ when Symbol
+ precondition_expression << "#{precondition.condition} && "
+ else
+ raise "I don't know how to evaluate this check_if precondition: %p" % [precondition.condition]
+ end
+ else
+ raise "I don't know how to evaluate this precondition: %p" % [precondition]
+ end
+ end
+ end
+
+ case pattern
+ # when String
+ # pattern_expression = pattern
+ when Regexp
+ pattern_expression = pattern.inspect
+ when Proc
+ pattern_expression = make_callback(pattern).to_s
+ else
+ raise "I don't know how to evaluate this pattern: %p" % [pattern]
+ end
+
+ @@code << " #{'els' unless @@first}if #{precondition_expression}match = scan(#{pattern_expression})\n"
+
+ for action in actions
+ case action
+ when Symbol
+ @@code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+ @@code << " encoder.text_token match, #{action.inspect}\n"
+ when Array
+ case action.first
+ when :push
+ case action.last
+ when Symbol
+ @@code << " p 'push %p' % [#{action.last.inspect}]\n" if $DEBUG
+ @@code << " state = #{action.last.inspect}\n"
+ when Proc
+ callback = make_callback(action.last)
+ case action.last.arity
+ when 0
+ arguments = ''
+ when 1
+ arguments = '(match)'
+ else
+ raise "I got %p arguments for push: %p, but I only know how to evaluate 0..1" % [action.last.arity, callback]
+ end
+ @@code << " p 'push %p' % [#{callback}]\n" if $DEBUG
+ @@code << " state = #{callback}#{arguments}\n"
+ else
+ raise "I don't know how to evaluate this push state: %p" % [action.last]
+ end
+ @@code << " states << state\n"
+ @@code << " encoder.begin_group state\n"
+ when :pop
+ @@code << " p 'pop %p' % [states.last]\n" if $DEBUG
+ @@code << " encoder.end_group states.pop\n"
+ @@code << " state = states.last\n"
+ end
+ when Proc
+ callback = make_callback(action)
+ case action.arity
+ when 0
+ arguments = ''
+ when 1
+ arguments = '(match)'
+ when 2
+ arguments = '(match, encoder)'
+ else
+ raise "I got %p arguments for action: %p, but I only know how to evaluate 0..2" % [action.arity, callback]
+ end
+ @@code << " p 'calling %p'\n" % [callback] if $DEBUG
+ @@code << " #{callback}#{arguments}\n"
+
+ else
+ raise "I don't know how to evaluate this action: %p" % [action]
+ end
+ end
+
+ @@first = false
+ end
+
+ def push state = nil, &block
+ raise 'push requires a state or a block; got nothing' unless state || block
+ [:push, state || block]
+ end
+
+ def pop
+ [:pop]
+ end
+
+ def check_if value = nil, &callback
+ CheckIf.new value || callback
+ end
+
+ protected
+
+ def make_callback block
+ @callbacks ||= {}
+
+ base_name = "__callback_line_#{block.source_location.last}"
+ name = base_name
+ counter = 'a'
+ while @callbacks.key?(name)
+ name = "#{base_name}_#{counter}"
+ counter.succ!
+ end
+
+ @callbacks[name] = define_method(name, &block)
+ end
+ end
+ end
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript4 < JavaScript4RuleBasedScanner
+
+ register_for :java_script4
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ state :initial do
+ # on %r/ [ \t]* \n \s* /x, :space, -> { @value_expected = true }
+ # on %r/ [ \t]+ | \\\n /x, :space
+ on %r/ \s+ | \\\n /x, :space, -> (match) { @value_expected = true if !@value_expected && match.index(?\n) }
+
+ on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, -> { @value_expected = true }
+ # state = :open_multi_line_comment if self[1]
+
+ on? %r/\.?\d/ do
+ on %r/0[xX][0-9A-Fa-f]+/, :hex, -> { @key_expected = @value_expected = false }
+ on %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, -> { @key_expected = @value_expected = false }
+ on %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, -> { @key_expected = @value_expected = false }
+ on %r/\d+/, :integer, -> { @key_expected = @value_expected = false }
+ end
+
+ on check_if(:@value_expected), %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ @value_expected = false
+ end
+
+ on %r/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x, :operator, -> (match) do
+ @value_expected = true
+ @key_expected = /[{,]$/ === match
+ @function_expected = false
+ end
+
+ on %r/ [)\]}]+ /x, :operator, -> { @function_expected = @key_expected = @value_expected = false }
+
+ on %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, -> (match, encoder) do
+ kind = IDENT_KIND[match]
+ @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif @function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif @key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ encoder.text_token match, kind
+ @function_expected = (kind == :keyword) && (match == 'function')
+ @key_expected = false
+ end
+
+ on %r/["']/, push { |match|
+ @string_delimiter = match
+ @key_expected && check(KEY_CHECK_PATTERN[match]) ? :key : :string
+ }, :delimiter
+
+ on check_if(:@value_expected), %r/\//, push(:regexp), :delimiter, -> { @string_delimiter = '/' }
+
+ on %r/ \/ /x, :operator, -> { @value_expected = true; @key_expected = false }
+ end
+
+ state :string, :regexp, :key do
+ on -> { STRING_CONTENT_PATTERN[@string_delimiter] }, :content
+ # on 'STRING_CONTENT_PATTERN[@string_delimiter]', :content
+
+ # on %r/\//, :delimiter, -> (match, encoder) do
+ # modifiers = scan(/[gim]+/)
+ # encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ # @string_delimiter = nil
+ # @key_expected = @value_expected = false
+ # end, pop
+ #
+ # on %r/["']/, :delimiter, -> do
+ # @string_delimiter = nil
+ # @key_expected = @value_expected = false
+ # end, pop
+
+ on %r/["'\/]/, :delimiter, -> (match, encoder) do
+ if match == '/'
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ end, pop
+
+ on check_if { |state| state != :regexp }, %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox, -> (match, encoder) do
+ if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ encoder.text_token match, :content
+ else
+ encoder.text_token match, :char
+ end
+ end
+
+ on check_if { |state| state == :regexp }, %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox, :char
+ on %r/\\./m, :content
+ on %r/ \\ /x, pop, :error, -> do
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ end
+ end
+
+ # state :open_multi_line_comment do
+ # on %r! .*? \*/ !mx, :initial # don't consume!
+ # on %r/ .+ /mx, :comment, -> { @value_expected = true }
+ #
+ # # if match = scan(%r! .*? \*/ !mx)
+ # # state = :initial
+ # # else
+ # # match = scan(%r! .+ !mx)
+ # # end
+ # # value_expected = true
+ # # encoder.text_token match, :comment if match
+ # end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ scan_tokens_code = <<-"RUBY"
+ def scan_tokens encoder, options#{ def_line = __LINE__; nil }
+ state, @string_delimiter = options[:state] || @state
+ if @string_delimiter
+ encoder.begin_group state
+ end
+
+ @value_expected = true
+ @key_expected = false
+ @function_expected = false
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+#{ @@code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state, string_delimiter
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+ RUBY
+
+ if ENV['PUTS']
+ puts scan_tokens_code
+ puts "callbacks: #{@callbacks.size}"
+ end
+ class_eval scan_tokens_code, __FILE__, def_line
+
+ protected
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script5.rb b/lib/coderay/scanners/java_script5.rb
new file mode 100644
index 0000000..9839d23
--- /dev/null
+++ b/lib/coderay/scanners/java_script5.rb
@@ -0,0 +1,162 @@
+# TODO: string_delimiter should be part of the state: push(:regexp, '/'), check_if -> (state, delimiter) { … }
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript5 < RuleBasedScanner
+
+ register_for :java_script5
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ state :initial do
+ on %r/ \s+ | \\\n /x, :space, set(:value_expected) { |match, value_expected| value_expected || match.index(?\n) }
+ on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, flag_off(:value_expected)
+ # state = :open_multi_line_comment if self[1]
+
+ on? %r/\.?\d/ do
+ on %r/0[xX][0-9A-Fa-f]+/, :hex, flag_off(:key_expected, :value_expected)
+ on %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, flag_off(:key_expected, :value_expected)
+ on %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, flag_off(:key_expected, :value_expected)
+ on %r/\d+/, :integer, flag_off(:key_expected, :value_expected)
+ end
+
+ on check_if(:value_expected), %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ end, flag_off(:value_expected)
+
+ on %r/ [-+*=<>?:;,!&^|(\[{~%]++ (?<![{,]) | \.+(?!\d) /x, :operator, flag_on(:value_expected), flag_off(:key_expected, :function_expected)
+ on %r/ [-+*=<>?:;,!&^|(\[{~%]*+ (?<=[{,]) /x, :operator, flag_on(:value_expected, :key_expected), flag_off(:function_expected)
+ on %r/ [)\]}]+ /x, :operator, flag_off(:function_expected, :key_expected, :value_expected)
+
+ on %r/ function (?![A-Za-z_0-9$]) /x, :keyword, flag_on(:function_expected), flag_off(:key_expected, :value_expected)
+ on %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, kind { |match, function_expected, key_expected|
+ kind = IDENT_KIND[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+
+ kind
+ }, flag_off(:function_expected, :key_expected), set(:value_expected) { |match| KEYWORDS_EXPECTING_VALUE[match] }
+
+ on %r/["']/, push { |match, key_expected| key_expected && check(KEY_CHECK_PATTERN[match]) ? :key : :string }, :delimiter, set(:string_delimiter) { |match| match }
+ on check_if(:value_expected), %r/\//, push(:regexp), :delimiter
+
+ on %r/\//, :operator, flag_on(:value_expected), flag_off(:key_expected)
+ end
+
+ state :string, :key do
+ on pattern { |string_delimiter| STRING_CONTENT_PATTERN[string_delimiter] }, :content
+ on %r/["']/, :delimiter, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop
+ on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, kind { |match, string_delimiter|
+ string_delimiter == "'" && !(match == "\\\\" || match == "\\'") ? :content : :char
+ }
+ on %r/ \\. /mx, :content
+ on %r/ \\ /x, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop, :error
+ end
+
+ state :regexp do
+ on STRING_CONTENT_PATTERN['/'], :content
+ on %r/(\/)([gim]+)?/, groups(:delimiter, :modifier), flag_off(:key_expected, :value_expected), pop
+ on %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ on %r/\\./m, :content
+ on %r/ \\ /x, pop, :error, flag_off(:key_expected, :value_expected)
+ end
+
+ # state :open_multi_line_comment do
+ # on %r! .*? \*/ !mx, :initial # don't consume!
+ # on %r/ .+ /mx, :comment, -> { value_expected = true }
+ #
+ # # if match = scan(%r! .*? \*/ !mx)
+ # # state = :initial
+ # # else
+ # # match = scan(%r! .+ !mx)
+ # # end
+ # # value_expected = true
+ # # encoder.text_token match, :comment if match
+ # end
+
+ protected
+
+ def setup
+ super
+
+ @string_delimiter = nil
+ @value_expected = true
+ @key_expected = false
+ @function_expected = false
+ end
+
+ def close_groups encoder, states
+ if [:string, :key, :regexp].include? states.last
+ encoder.end_group states.last
+ end
+ end
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json1.rb b/lib/coderay/scanners/json1.rb
new file mode 100644
index 0000000..d44f6ba
--- /dev/null
+++ b/lib/coderay/scanners/json1.rb
@@ -0,0 +1,100 @@
+module CodeRay
+module Scanners
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON1 < Scanner
+
+ register_for :json1
+ file_extension 'json'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ until eos?
+
+ case state
+
+ when :initial
+ if match = scan(/ \s+ /x)
+ encoder.text_token match, :space
+ elsif match = scan(/ " (?=#{KEY}) /ox)
+ state = :key
+ encoder.begin_group :key
+ encoder.text_token match, :delimiter
+ elsif match = scan(/ " /x)
+ state = :string
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
+ elsif match = scan(/ [:,\[{\]}] /x)
+ encoder.text_token match, :operator
+ elsif match = scan(/ true | false | null /x)
+ encoder.text_token match, :value
+ elsif match = scan(/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x)
+ encoder.text_token match, :float
+ elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
+ encoder.text_token match, :integer
+ else
+ encoder.text_token getch, :error
+ end
+
+ when :string, :key
+ if match = scan(/ [^\\"]+ /x)
+ encoder.text_token match, :content
+ elsif match = scan(/ " /x)
+ encoder.text_token match, :delimiter
+ encoder.end_group state
+ state = :initial
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /ox)
+ encoder.text_token match, :char
+ elsif match = scan(/ \\. /mx)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ /x)
+ encoder.end_group state
+ state = :initial
+ encoder.text_token match, :error
+ else
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
+ end
+
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json2.rb b/lib/coderay/scanners/json2.rb
new file mode 100644
index 0000000..51df782
--- /dev/null
+++ b/lib/coderay/scanners/json2.rb
@@ -0,0 +1,131 @@
+module CodeRay
+module Scanners
+
+ class JSON2RuleBasedScanner < Scanner
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @@states ||= {}
+
+ @@rules = []
+
+ instance_eval(&block)
+
+ for name in names
+ @@states[name] = @@rules
+ end
+
+ @@rules = nil
+ end
+
+ def token pattern, *actions
+ @@rules << [pattern, *actions]
+ end
+
+ def push_group name
+ [:begin_group, name]
+ end
+
+ def pop_group
+ [:end_group]
+ end
+ end
+ end
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON2 < JSON2RuleBasedScanner
+
+ register_for :json2
+ file_extension 'json'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ state :initial do
+ token %r/ \s+ /x, :space
+
+ token %r/ " (?=#{KEY}) /x, push_group(:key), :delimiter
+ token %r/ " /x, push_group(:string), :delimiter
+
+ token %r/ [:,\[{\]}] /x, :operator
+
+ token %r/ true | false | null /x, :value
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x, :float
+ token %r/ -? (?: 0 | [1-9]\d* ) /x, :integer
+ end
+
+ state :string, :key do
+ token %r/ [^\\"]+ /x, :content
+
+ token %r/ " /x, :delimiter, pop_group
+
+ token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ token %r/ \\. /mx, :content
+ token %r/ \\ /x, pop_group, :error
+
+ # token %r/$/, end_group
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ states = [state]
+
+ until eos?
+ for pattern, *actions in @@states[state]
+ if match = scan(pattern)
+ for action in actions
+ case action
+ when Symbol
+ encoder.text_token match, action
+ when Array
+ case action.first
+ when :begin_group
+ encoder.begin_group action.last
+ state = action.last
+ states << state
+ when :end_group
+ encoder.end_group states.pop
+ state = states.last
+ end
+ end
+ end
+
+ break
+ end
+ end && encoder.text_token(getch, :error)
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json3.rb b/lib/coderay/scanners/json3.rb
new file mode 100644
index 0000000..e05feb4
--- /dev/null
+++ b/lib/coderay/scanners/json3.rb
@@ -0,0 +1,143 @@
+module CodeRay
+module Scanners
+
+ class JSON3RuleBasedScanner < Scanner
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @@code ||= ""
+
+ @@code << "when #{names.map(&:inspect).join(', ')}\n"
+
+ @@first = true
+ instance_eval(&block)
+ @@code << " else\n"
+ # @@code << " raise 'no match for #{names.map(&:inspect).join(', ')}'\n"
+ @@code << " encoder.text_token getch, :error\n"
+ @@code << " end\n"
+ @@code << " \n"
+ end
+
+ def token pattern, *actions
+ @@code << " #{'els' unless @@first}if match = scan(#{pattern.inspect})\n"
+
+ for action in actions
+ case action
+ when Symbol
+ @@code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+ @@code << " encoder.text_token match, #{action.inspect}\n"
+ when Array
+ case action.first
+ when :begin_group
+ @@code << " p 'begin_group %p' % [#{action.last.inspect}]\n" if $DEBUG
+ @@code << " state = #{action.last.inspect}\n"
+ @@code << " states << #{action.last.inspect}\n"
+ @@code << " encoder.begin_group #{action.last.inspect}\n"
+ when :end_group
+ @@code << " p 'end_group %p' % [states.last]\n" if $DEBUG
+ @@code << " encoder.end_group states.pop\n"
+ @@code << " state = states.last\n"
+ end
+ end
+ end
+
+ @@first = false
+ end
+
+ def push_group name
+ [:begin_group, name]
+ end
+
+ def pop_group
+ [:end_group]
+ end
+ end
+ end
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON3 < JSON3RuleBasedScanner
+
+ register_for :json3
+ file_extension 'json'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ state :initial do
+ token %r/ \s+ /x, :space
+
+ token %r/ [:,\[{\]}] /x, :operator
+
+ token %r/ " (?=#{KEY}) /x, push_group(:key), :delimiter
+ token %r/ " /x, push_group(:string), :delimiter
+
+ token %r/ true | false | null /x, :value
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: e[-+]? \d+ )? | e[-+]? \d+ ) /ix, :float
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: e[+-] \d+ )? /ix, :integer
+ end
+
+ state :key, :string do
+ token %r/ [^\\"]+ /x, :content
+
+ token %r/ " /x, :delimiter, pop_group
+
+ token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ token %r/ \\. /mx, :content
+ token %r/ \\ /x, pop_group, :error
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ scan_tokens_code = <<-"RUBY"
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+#{ @@code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+ RUBY
+
+ # puts scan_tokens_code
+ class_eval scan_tokens_code
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json4.rb b/lib/coderay/scanners/json4.rb
new file mode 100644
index 0000000..38d71e3
--- /dev/null
+++ b/lib/coderay/scanners/json4.rb
@@ -0,0 +1,143 @@
+module CodeRay
+module Scanners
+
+ class JSON4RuleBasedScanner < Scanner
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @@code ||= ""
+
+ @@code << "when #{names.map(&:inspect).join(', ')}\n"
+
+ @@first = true
+ instance_eval(&block)
+ @@code << " else\n"
+ # @@code << " raise 'no match for #{names.map(&:inspect).join(', ')}'\n"
+ @@code << " encoder.text_token getch, :error\n"
+ @@code << " end\n"
+ @@code << " \n"
+ end
+
+ def token pattern, *actions
+ @@code << " #{'els' unless @@first}if match = scan(#{pattern.inspect})\n"
+
+ for action in actions
+ case action
+ when Symbol
+ @@code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+ @@code << " encoder.text_token match, #{action.inspect}\n"
+ when Array
+ case action.first
+ when :push
+ @@code << " p 'push %p' % [#{action.last.inspect}]\n" if $DEBUG
+ @@code << " state = #{action.last.inspect}\n"
+ @@code << " states << state\n"
+ @@code << " encoder.begin_group state\n"
+ when :pop
+ @@code << " p 'pop %p' % [states.last]\n" if $DEBUG
+ @@code << " encoder.end_group states.pop\n"
+ @@code << " state = states.last\n"
+ end
+ end
+ end
+
+ @@first = false
+ end
+
+ def push state
+ [:push, state]
+ end
+
+ def pop
+ [:pop]
+ end
+ end
+ end
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON4 < JSON4RuleBasedScanner
+
+ register_for :json4
+ file_extension 'json'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ state :initial do
+ token %r/ \s+ /x, :space
+
+ token %r/ [:,\[{\]}] /x, :operator
+
+ token %r/ " (?=#{KEY}) /x, push(:key), :delimiter
+ token %r/ " /x, push(:string), :delimiter
+
+ token %r/ true | false | null /x, :value
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: e[-+]? \d+ )? | e[-+]? \d+ ) /ix, :float
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: e[+-] \d+ )? /ix, :integer
+ end
+
+ state :key, :string do
+ token %r/ [^\\"]+ /x, :content
+
+ token %r/ " /x, :delimiter, pop
+
+ token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ token %r/ \\. /mx, :content
+ token %r/ \\ /x, :error, pop
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ scan_tokens_code = <<-"RUBY"
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+#{ @@code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+ RUBY
+
+ # puts scan_tokens_code
+ class_eval scan_tokens_code
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json5.rb b/lib/coderay/scanners/json5.rb
new file mode 100644
index 0000000..8b0a8bd
--- /dev/null
+++ b/lib/coderay/scanners/json5.rb
@@ -0,0 +1,53 @@
+module CodeRay
+module Scanners
+
+ # Scanner for JSON (JavaScript Object Notation).
+ #
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ class JSON5 < RuleBasedScanner
+
+ register_for :json5
+ file_extension 'json'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ state :initial do
+ on %r/ \s+ /x, :space
+
+ on %r/ [:,\[{\]}] /x, :operator
+
+ on %r/ " (?=#{KEY}) /x, push(:key), :delimiter
+ on %r/ " /x, push(:string), :delimiter
+
+ on %r/ true | false | null /x, :value
+ on %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: e[-+]? \d+ )? | e[-+]? \d+ ) /ix, :float
+ on %r/ -? (?: 0 | [1-9]\d* ) (?: e[+-] \d+ )? /ix, :integer
+ end
+
+ state :key, :string do
+ on %r/ [^\\"]+ /x, :content
+
+ on %r/ " /x, :delimiter, pop
+
+ on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ on %r/ \\. /mx, :content
+ on %r/ \\ /x, :error, pop
+ end
+
+ def close_groups encoder, states
+ if [:string, :key].include? states.last
+ encoder.end_group states.last
+ end
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/lua2.rb b/lib/coderay/scanners/lua2.rb
new file mode 100644
index 0000000..fa20e9b
--- /dev/null
+++ b/lib/coderay/scanners/lua2.rb
@@ -0,0 +1,157 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+ # Scanner for the Lua[http://lua.org] programming lanuage.
+ #
+ # The language’s complete syntax is defined in
+ # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+ # which is what this scanner tries to conform to.
+ class Lua2 < RuleBasedScanner
+
+ register_for :lua2
+ file_extension 'lua'
+ title 'Lua'
+
+ # Keywords used in Lua.
+ KEYWORDS = %w[and break do else elseif end
+ for function goto if in
+ local not or repeat return
+ then until while
+ ]
+
+ # Constants set by the Lua core.
+ PREDEFINED_CONSTANTS = %w[false true nil]
+
+ # The expressions contained in this array are parts of Lua’s `basic'
+ # library. Although it’s not entirely necessary to load that library,
+ # it is highly recommended and one would have to provide own implementations
+ # of some of these expressions if one does not do so. They however aren’t
+ # keywords, neither are they constants, but nearly predefined, so they
+ # get tagged as `predefined' rather than anything else.
+ #
+ # This list excludes values of form `_UPPERCASE' because the Lua manual
+ # requires such identifiers to be reserved by Lua anyway and they are
+ # highlighted directly accordingly, without the need for specific
+ # identifiers to be listed here.
+ PREDEFINED_EXPRESSIONS = %w[
+ assert collectgarbage dofile error getmetatable
+ ipairs load loadfile next pairs pcall print
+ rawequal rawget rawlen rawset select setmetatable
+ tonumber tostring type xpcall
+ ]
+
+ # Automatic token kind selection for normal words.
+ IDENT_KIND = CodeRay::WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(PREDEFINED_EXPRESSIONS, :predefined)
+
+ state :initial, :map do
+ on %r/\-\-\[\=*\[/, push(:long_comment, :comment), :delimiter, #--[[ long (possibly multiline) comment ]]
+ set(:num_equals, -> (match) { match.count('=') }) # Number must match for comment end
+ on %r/--.*$/, :comment # --Lua comment
+ on %r/\[=*\[/, push(:long_string, :string), :delimiter, # [[ long (possibly multiline) string ]]
+ set(:num_equals, -> (match) { match.count('=') }) # Number must match for string end
+ on %r/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/, :label # ::goto_label::
+ on %r/_[A-Z]+/, :predefined # _UPPERCASE are names reserved for Lua
+ on check_if { |brace_depth| brace_depth > 0 }, %r/([a-zA-Z_][a-zA-Z0-9_]*) (\s+)?(=)/x, groups(:key, :space, :operator)
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, kind { |match| IDENT_KIND[match] }, push_state { |match, kind| # Normal letters (or letters followed by digits)
+ # Extra highlighting for entities following certain keywords
+ if kind == :keyword && match == 'function'
+ :function_expected
+ elsif kind == :keyword && match == 'goto'
+ :goto_label_expected
+ elsif kind == :keyword && match == 'local'
+ :local_var_expected
+ end
+ }
+
+ on %r/\{/, push(:map), kind { |brace_depth| brace_depth > 0 ? :inline_delimiter : :delimiter }, increment(:brace_depth) # Opening table brace {
+ on check_if { |brace_depth| brace_depth == 1 }, %r/\}/, :delimiter, pop, decrement(:brace_depth) # Closing table brace }
+ on check_if { |brace_depth| brace_depth == 0 }, %r/\}/, :error # Mismatched brace
+ on %r/\}/, :inline_delimiter, pop, decrement(:brace_depth)
+
+ on %r/"/, push(:double_quoted_string, :string), :delimiter # String delimiters " and '
+ on %r/'/, push(:single_quoted_string, :string), :delimiter
+ # ↓Prefix hex number ←|→ decimal number
+ on %r/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix, :float # hexadecimal constants have no E power, decimal ones no P power
+ # ↓Prefix hex number ←|→ decimal number
+ on %r/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix, :integer # hexadecimal constants have no E power, decimal ones no P power
+ on %r/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x, :operator # Operators
+ on %r/\s+/, :space # Space
+ end
+
+ state :function_expected do
+ on %r/\(.*?\)/m, :operator, pop_state # x = function() # "Anonymous" function without explicit name
+ on %r/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x, :ident # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :function, pop_state # function foo()
+ on %r/\s+/, :space # Between the `function' keyword and the ident may be any amount of whitespace
+ end
+
+ state :goto_label_expected do
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :label, pop_state
+ on %r/\s+/, :space # Between the `goto' keyword and the label may be any amount of whitespace
+ end
+
+ state :local_var_expected do
+ on %r/function/, :keyword, pop_state, push_state(:function_expected) # local function ...
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :local_variable
+ on %r/,/, :operator
+ on %r/\=/, :operator, pop_state
+ on %r/\n/, :space, pop_state
+ on %r/\s+/, :space
+ end
+
+ state :long_comment do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:comment)
+ on %r/.*/m, :error, pop(:comment)
+ end
+
+ state :long_string do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:string) # Long strings do not interpret any escape sequences
+ on %r/.*/m, :error, pop(:string)
+ end
+
+ state :single_quoted_string do
+ on %r/[^\\'\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/'/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings
+ # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ end
+
+ state :double_quoted_string do
+ on %r/[^\\"\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/"/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings
+ # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ end
+
+ protected
+
+ def setup
+ super
+
+ @brace_depth = 0
+ @num_equals = nil
+ end
+
+ def close_groups encoder, states
+ states.reverse_each do |state|
+ case state
+ when :long_string, :single_quoted_string, :double_quoted_string
+ encoder.end_group :string
+ when :long_comment
+ encoder.end_group :long_comment
+ when :map
+ encoder.end_group :map
+ end
+ end
+ end
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/lua2b.rb b/lib/coderay/scanners/lua2b.rb
new file mode 100644
index 0000000..9e2b1fe
--- /dev/null
+++ b/lib/coderay/scanners/lua2b.rb
@@ -0,0 +1,157 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+ # Scanner for the Lua[http://lua.org] programming lanuage.
+ #
+ # The language’s complete syntax is defined in
+ # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+ # which is what this scanner tries to conform to.
+ class Lua2 < RuleBasedScanner
+
+ register_for :lua2
+ file_extension 'lua'
+ title 'Lua'
+
+ # Keywords used in Lua.
+ KEYWORDS = %w[and break do else elseif end
+ for function goto if in
+ local not or repeat return
+ then until while
+ ]
+
+ # Constants set by the Lua core.
+ PREDEFINED_CONSTANTS = %w[false true nil]
+
+ # The expressions contained in this array are parts of Lua’s `basic'
+ # library. Although it’s not entirely necessary to load that library,
+ # it is highly recommended and one would have to provide own implementations
+ # of some of these expressions if one does not do so. They however aren’t
+ # keywords, neither are they constants, but nearly predefined, so they
+ # get tagged as `predefined' rather than anything else.
+ #
+ # This list excludes values of form `_UPPERCASE' because the Lua manual
+ # requires such identifiers to be reserved by Lua anyway and they are
+ # highlighted directly accordingly, without the need for specific
+ # identifiers to be listed here.
+ PREDEFINED_EXPRESSIONS = %w[
+ assert collectgarbage dofile error getmetatable
+ ipairs load loadfile next pairs pcall print
+ rawequal rawget rawlen rawset select setmetatable
+ tonumber tostring type xpcall
+ ]
+
+ # Automatic token kind selection for normal words.
+ IDENT_KIND = CodeRay::WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(PREDEFINED_EXPRESSIONS, :predefined)
+
+ state :initial, :map => :map do
+ on %r/\-\-\[\=*\[/, push(:long_comment, :comment), :delimiter, #--[[ long (possibly multiline) comment ]]
+ set(:num_equals, -> (match) { match.count('=') }) # Number must match for comment end
+ on %r/--.*$/, :comment # --Lua comment
+ on %r/\[=*\[/, push(:long_string, :string), :delimiter, # [[ long (possibly multiline) string ]]
+ set(:num_equals, -> (match) { match.count('=') }) # Number must match for string end
+ on %r/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/, :label # ::goto_label::
+ on %r/_[A-Z]+/, :predefined # _UPPERCASE are names reserved for Lua
+ on check_if { |brace_depth| brace_depth > 0 }, %r/([a-zA-Z_][a-zA-Z0-9_]*) (\s+)?(=)/x, groups(:key, :space, :operator)
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, kind { |match| IDENT_KIND[match] }, push_state { |match, kind| # Normal letters (or letters followed by digits)
+ # Extra highlighting for entities following certain keywords
+ if kind == :keyword && match == 'function'
+ :function_expected
+ elsif kind == :keyword && match == 'goto'
+ :goto_label_expected
+ elsif kind == :keyword && match == 'local'
+ :local_var_expected
+ end
+ }
+
+ on %r/\{/, push(:map), kind { |brace_depth| brace_depth > 0 ? :inline_delimiter : :delimiter }, increment(:brace_depth) # Opening table brace {
+ on check_if { |brace_depth| brace_depth == 1 }, %r/\}/, :delimiter, pop, decrement(:brace_depth) # Closing table brace }
+ on check_if { |brace_depth| brace_depth == 0 }, %r/\}/, :error # Mismatched brace
+ on %r/\}/, :inline_delimiter, pop, decrement(:brace_depth)
+
+ on %r/"/, push(:double_quoted_string, :string), :delimiter # String delimiters " and '
+ on %r/'/, push(:single_quoted_string, :string), :delimiter
+ # ↓Prefix hex number ←|→ decimal number
+ on %r/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix, :float # hexadecimal constants have no E power, decimal ones no P power
+ # ↓Prefix hex number ←|→ decimal number
+ on %r/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix, :integer # hexadecimal constants have no E power, decimal ones no P power
+ on %r/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x, :operator # Operators
+ on %r/\s+/, :space # Space
+ end
+
+ state :function_expected do
+ on %r/\(.*?\)/m, :operator, pop_state # x = function() # "Anonymous" function without explicit name
+ on %r/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x, :ident # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :function, pop_state # function foo()
+ on %r/\s+/, :space # Between the `function' keyword and the ident may be any amount of whitespace
+ end
+
+ state :goto_label_expected do
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :label, pop_state
+ on %r/\s+/, :space # Between the `goto' keyword and the label may be any amount of whitespace
+ end
+
+ state :local_var_expected do
+ on %r/function/, :keyword, pop_state, push_state(:function_expected) # local function ...
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :local_variable
+ on %r/,/, :operator
+ on %r/\=/, :operator, pop_state
+ on %r/\n/, :space, pop_state
+ on %r/\s+/, :space
+ end
+
+ state :long_comment => :comment do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:comment)
+ on %r/.*/m, :error, pop(:comment)
+ end
+
+ state :long_string => :string do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:string) # Long strings do not interpret any escape sequences
+ on %r/.*/m, :error, pop(:string)
+ end
+
+ state :single_quoted_string => :string do
+ on %r/[^\\'\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/'/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings
+ # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ end
+
+ state :double_quoted_string => :string do
+ on %r/[^\\"\n]+/, :content # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/"/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string) # Lua forbids unescaped newlines in normal non-long strings
+ # encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ end
+
+ protected
+
+ def setup
+ super
+
+ @brace_depth = 0
+ @num_equals = nil
+ end
+
+ def close_groups encoder, states
+ states.reverse_each do |state|
+ case state
+ when :long_string, :single_quoted_string, :double_quoted_string
+ encoder.end_group :string
+ when :long_comment
+ encoder.end_group :long_comment
+ when :map
+ encoder.end_group :map
+ end
+ end
+ end
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/lua3.rb b/lib/coderay/scanners/lua3.rb
new file mode 100644
index 0000000..f0693bb
--- /dev/null
+++ b/lib/coderay/scanners/lua3.rb
@@ -0,0 +1,142 @@
+# encoding: utf-8
+# Pseudocode: states optionally define groups, comments removed, counter definition?
+
+module CodeRay
+module Scanners
+
+ # Scanner for the Lua[http://lua.org] programming lanuage.
+ #
+ # The language’s complete syntax is defined in
+ # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+ # which is what this scanner tries to conform to.
+ class Lua3 < RuleBasedScanner
+
+ register_for :lua3
+ file_extension 'lua'
+ title 'Lua'
+
+ # Keywords used in Lua.
+ KEYWORDS = %w[and break do else elseif end
+ for function goto if in
+ local not or repeat return
+ then until while
+ ]
+
+ # Constants set by the Lua core.
+ PREDEFINED_CONSTANTS = %w[false true nil]
+
+ # The expressions contained in this array are parts of Lua’s `basic'
+ # library. Although it’s not entirely necessary to load that library,
+ # it is highly recommended and one would have to provide own implementations
+ # of some of these expressions if one does not do so. They however aren’t
+ # keywords, neither are they constants, but nearly predefined, so they
+ # get tagged as `predefined' rather than anything else.
+ #
+ # This list excludes values of form `_UPPERCASE' because the Lua manual
+ # requires such identifiers to be reserved by Lua anyway and they are
+ # highlighted directly accordingly, without the need for specific
+ # identifiers to be listed here.
+ PREDEFINED_EXPRESSIONS = %w[
+ assert collectgarbage dofile error getmetatable
+ ipairs load loadfile next pairs pcall print
+ rawequal rawget rawlen rawset select setmetatable
+ tonumber tostring type xpcall
+ ]
+
+ # Automatic token kind selection for normal words.
+ IDENT_KIND = CodeRay::WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(PREDEFINED_EXPRESSIONS, :predefined)
+
+ protected
+
+ # Scanner initialization.
+ def setup
+ super
+ @brace_depth = 0
+ @num_equals = nil
+ end
+
+ counter :brace_depth
+
+ state :initial, :map => :map do
+ on %r/\-\-\[\=*\[/, push(:long_comment), :delimiter, set(:num_equals, -> (match) { match.count('=') })
+ on %r/--.*$/, :comment
+ on %r/\[=*\[/, push(:long_string), :delimiter, set(:num_equals, -> (match) { match.count('=') })
+ on %r/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/, :label
+ on %r/_[A-Z]+/, :predefined
+ on check_if(:brace_depth, :>, 0), %r/([a-zA-Z_][a-zA-Z0-9_]*) (\s+)?(=)/x, groups(:key, :space, :operator)
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, kind { |match| IDENT_KIND[match] }, push_state { |match, kind|
+ if kind == :keyword && match == 'function'
+ :function_expected
+ elsif kind == :keyword && match == 'goto'
+ :goto_label_expected
+ elsif kind == :keyword && match == 'local'
+ :local_var_expected
+ end
+ }
+
+ on %r/\{/, push(:map), kind { |brace_depth| brace_depth > 0 ? :inline_delimiter : :delimiter }, increment(:brace_depth)
+ on check_if(:brace_depth, :==, 1), %r/\}/, :delimiter, pop, decrement(:brace_depth)
+ on check_if(:brace_depth, :==, 0), %r/\}/, :error
+ on %r/\}/, :inline_delimiter, pop, decrement(:brace_depth)
+
+ on %r/"/, push(:double_quoted_string), :delimiter
+ on %r/'/, push(:single_quoted_string), :delimiter
+
+ on %r/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix, :float
+
+ on %r/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix, :integer
+ on %r/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x, :operator
+ on %r/\s+/, :space
+ end
+
+ state :function_expected do
+ on %r/\(.*?\)/m, :operator, pop
+ on %r/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x, :ident
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :function, pop
+ on %r/\s+/, :space
+ end
+
+ state :goto_label_expected do
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :label, pop
+ on %r/\s+/, :space
+ end
+
+ state :local_var_expected do
+ on %r/function/, :keyword, pop, push(:function_expected)
+ on %r/[a-zA-Z_][a-zA-Z0-9_]*/, :local_variable
+ on %r/,/, :operator
+ on %r/\=/, :operator, pop
+ on %r/\n/, :space, pop
+ on %r/\s+/, :space
+ end
+
+ state :long_comment => :comment do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:comment)
+ on %r/.*/m, :error, pop(:comment)
+ end
+
+ state :long_string => :string do
+ on pattern { |num_equals| %r/(.*?)(\]={#{num_equals}}\])/m }, groups(:content, :delimiter), pop(:string)
+ on %r/.*/m, :error, pop(:string)
+ end
+
+ state :single_quoted_string => :string do
+ on %r/[^\\'\n]+/, :content
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/'/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string)
+ end
+
+ state :double_quoted_string => :string do
+ on %r/[^\\"\n]+/, :content
+ on %r/\\(?:["'abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m, :char
+ on %r/"/, :delimiter, pop(:string)
+ on %r/\n/, :error, pop(:string)
+ end
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/lua4.rb b/lib/coderay/scanners/lua4.rb
new file mode 100644
index 0000000..0315d34
--- /dev/null
+++ b/lib/coderay/scanners/lua4.rb
@@ -0,0 +1,89 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+ # Scanner for the Lua[http://lua.org] programming lanuage.
+ #
+ # The language’s complete syntax is defined in
+ # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+ # which is what this scanner tries to conform to.
+ class Lua4 < RuleBasedScanner
+
+ register_for :lua4
+ file_extension 'lua'
+ title 'Lua'
+
+ protected
+
+ state :initial do
+ on %r'#!(.*?)$', :doctype
+ on %r//, push_state(:base)
+ end
+
+ state :base do
+ on %r'--\[(=*)\[.*?\]\1\]'m, :comment
+ on %r'--.*$', :comment
+
+ on %r'(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?'i, :float
+ on %r'\d+e[+-]?\d+'i, :float
+ on %r'0x[0-9a-f]*'i, :hex
+ on %r'\d+', :integer
+
+ on %r'\n', :space
+ on %r'[^\S\n]', :space
+ # multiline strings
+ on %r'\[(=*)\[.*?\]\1\]'m, :string
+
+ on %r'(==|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#!.\\:])', :operator
+ on %r'[\[\]{}().,:;]', :operator
+ on %r'(and|or|not)\b', :operator
+
+ on %r'(break|do|else|elseif|end|for|if|in|repeat|return|then|until|while)\b', :keyword
+ on %r'(local)\b', :keyword
+ on %r'(true|false|nil)\b', :predefined_constant
+
+ on %r'(function)\b', :keyword, push_state(:funcname)
+
+ on %r'[A-Za-z_]\w*(\.[A-Za-z_]\w*)?', :ident
+
+ # on %r"'", :string, combined(:stringescape, :sqs)
+ on %r"'", :string, push_state(:sqs)
+ # on %r'"', :string, combined(:stringescape, :dqs)
+ on %r'"', :string, push_state(:dqs)
+ end
+
+ state :funcname do
+ on %r'\s+', :space
+ on %r'(?:([A-Za-z_]\w*)(\.))?([A-Za-z_]\w*)', groups(:class, :operator, :function), pop_state
+ # inline function
+ on %r'\(', :operator, pop_state
+ end
+
+ # if I understand correctly, every character is valid in a lua string,
+ # so this state is only for later corrections
+ # state :string do
+ # on %r'.', :string
+ # end
+
+ # state :stringescape do
+ # on %r/\\([abfnrtv\\"']|\d{1,3})/, :escape
+ # end
+
+ state :sqs do
+ on %r"'", :string, pop_state
+ # include(:string)
+ on %r/\\([abfnrtv\\"']|\d{1,3})/, :escape
+ on %r'.', :string
+ end
+
+ state :dqs do
+ on %r'"', :string, pop_state
+ # include(:string)
+ on %r/\\([abfnrtv\\"']|\d{1,3})/, :escape
+ on %r'.', :string
+ end
+ end
+
+end
+end
diff --git a/lib/coderay/simple_scanner.rb b/lib/coderay/simple_scanner.rb
new file mode 100644
index 0000000..6873f88
--- /dev/null
+++ b/lib/coderay/simple_scanner.rb
@@ -0,0 +1,40 @@
+require 'set'
+
+module CodeRay
+ module Scanners
+ class SimpleScanner < Scanner
+ extend SimpleScannerDSL
+
+ class << self
+ def define_scan_tokens!
+ if ENV['PUTS']
+ puts CodeRay.scan(scan_tokens_code, :ruby).terminal
+ puts "callbacks: #{callbacks.size}"
+ end
+
+ class_eval <<-RUBY
+def scan_tokens encoder, options
+#{ scan_tokens_code.chomp.gsub(/^/, ' ' * 2) }
+end
+ RUBY
+ end
+ end
+
+ def scan_tokens tokens, options
+ self.class.define_scan_tokens!
+
+ scan_tokens tokens, options
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def close_groups encoder, states
+ # TODO
+ end
+ end
+ end
+end \ No newline at end of file
diff --git a/lib/coderay/simple_scanner_dsl.rb b/lib/coderay/simple_scanner_dsl.rb
new file mode 100644
index 0000000..b3c8c57
--- /dev/null
+++ b/lib/coderay/simple_scanner_dsl.rb
@@ -0,0 +1,381 @@
+require 'set'
+
+module CodeRay
+ module Scanners
+ module SimpleScannerDSL
+ Pattern = Struct.new :pattern
+ Groups = Struct.new :token_kinds
+ Kind = Struct.new :token_kind
+ Push = Struct.new :state, :group
+ Pop = Struct.new :group
+ PushState = Struct.new :state
+ PopState = Class.new
+ Check = Struct.new :condition
+ CheckIf = Class.new Check
+ CheckUnless = Class.new Check
+ ValueSetter = Struct.new :targets, :value
+ Increment = Struct.new :targets, :operation, :value
+ Continue = Class.new
+
+ State = Struct.new :names, :block, :dsl do
+ def initialize(*)
+ super
+ eval
+ end
+
+ def eval
+ @first = true
+
+ @code = ""
+ instance_eval(&block)
+ end
+
+ def code
+ <<-RUBY
+when #{names.map(&:inspect).join(', ')}
+#{ rules_code.chomp.gsub(/^/, ' ') }
+ else
+#{ handle_unexpected_char_code.chomp.gsub(/^/, ' ' * 2) }
+ end
+ RUBY
+ end
+
+ protected
+
+ def rules_code
+ @code
+ end
+
+ def handle_unexpected_char_code
+ ''.tap do |code|
+ code << 'puts "no match for #{state.inspect} => skip char"' << "\n" if $DEBUG
+ code << 'encoder.text_token getch, :error'
+ end
+ end
+
+ public
+
+ def on? pattern
+ pattern_expression = pattern.inspect
+ @code << "#{'els' unless @first}if check(#{pattern_expression})\n"
+
+ @first = true
+ yield
+ @code << "end\n"
+
+ @first = false
+ end
+
+ def on *pattern_and_actions
+ if index = pattern_and_actions.find_index { |item| !(item.is_a?(Check) || item.is_a?(Regexp) || item.is_a?(Pattern)) }
+ conditions = pattern_and_actions[0..index - 1] or raise 'I need conditions or a pattern!'
+ actions = pattern_and_actions[index..-1] or raise 'I need actions!'
+ else
+ raise "invalid rule structure: #{pattern_and_actions.map(&:class)}"
+ end
+
+ condition_expressions = []
+ if conditions
+ for condition in conditions
+ case condition
+ when CheckIf
+ case condition.condition
+ when Proc
+ condition_expressions << "#{dsl.add_callback(condition.condition)}"
+ when Symbol
+ condition_expressions << "#{condition.condition}"
+ else
+ raise "I don't know how to evaluate this check_if condition: %p" % [condition.condition]
+ end
+ when CheckUnless
+ case condition.condition
+ when Proc
+ condition_expressions << "!#{dsl.add_callback(condition.condition)}"
+ when Symbol
+ condition_expressions << "!#{condition.condition}"
+ else
+ raise "I don't know how to evaluate this check_unless condition: %p" % [condition.condition]
+ end
+ when Pattern
+ case condition.pattern
+ when Proc
+ condition_expressions << "match = scan(#{dsl.add_callback(condition.pattern)})"
+ else
+ raise "I don't know how to evaluate this pattern: %p" % [condition.pattern]
+ end
+ when Regexp
+ condition_expressions << "match = scan(#{condition.inspect})"
+ else
+ raise "I don't know how to evaluate this pattern/condition: %p" % [condition]
+ end
+ end
+ end
+
+ @code << "#{'els' unless @first}if #{condition_expressions.join(' && ')}\n"
+
+ for action in actions
+ case action
+ when String
+ raise
+ @code << "p 'evaluate #{action.inspect}'\n" if $DEBUG
+ @code << "#{action}\n"
+
+ when Symbol
+ @code << "p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+ @code << "encoder.text_token match, #{action.inspect}\n"
+ when Kind
+ case action.token_kind
+ when Proc
+ @code << "encoder.text_token match, kind = #{dsl.add_callback(action.token_kind)}\n"
+ else
+ raise "I don't know how to evaluate this kind: %p" % [action.token_kind]
+ end
+ when Groups
+ @code << "p 'text_tokens %p in groups %p' % [match, #{action.token_kinds.inspect}]\n" if $DEBUG
+ action.token_kinds.each_with_index do |kind, i|
+ @code << "encoder.text_token self[#{i + 1}], #{kind.inspect} if self[#{i + 1}]\n"
+ end
+
+ when Push, PushState
+ case action.state
+ when String
+ raise
+ @code << "p 'push %p' % [#{action.state}]\n" if $DEBUG
+ @code << "state = #{action.state}\n"
+ @code << "states << state\n"
+ when Symbol
+ @code << "p 'push %p' % [#{action.state.inspect}]\n" if $DEBUG
+ @code << "state = #{action.state.inspect}\n"
+ @code << "states << state\n"
+ when Proc
+ @code << "if new_state = #{dsl.add_callback(action.state)}\n"
+ @code << " state = new_state\n"
+ @code << " states << new_state\n"
+ @code << "end\n"
+ else
+ raise "I don't know how to evaluate this push state: %p" % [action.state]
+ end
+ if action.is_a? Push
+ if action.state == action.group
+ @code << "encoder.begin_group state\n"
+ else
+ case action.state
+ when Symbol
+ @code << "p 'begin group %p' % [#{action.group.inspect}]\n" if $DEBUG
+ @code << "encoder.begin_group #{action.group.inspect}\n"
+ when Proc
+ @code << "encoder.begin_group #{dsl.add_callback(action.group)}\n"
+ else
+ raise "I don't know how to evaluate this push state: %p" % [action.state]
+ end
+ end
+ end
+ when Pop, PopState
+ @code << "p 'pop %p' % [states.last]\n" if $DEBUG
+ if action.is_a? Pop
+ if action.group
+ case action.group
+ when Symbol
+ @code << "encoder.end_group #{action.group.inspect}\n"
+ else
+ raise "I don't know how to evaluate this pop group: %p" % [action.group]
+ end
+ @code << "states.pop\n"
+ else
+ @code << "encoder.end_group states.pop\n"
+ end
+ else
+ @code << "states.pop\n"
+ end
+ @code << "state = states.last\n"
+
+ when ValueSetter
+ case action.value
+ when Proc
+ @code << "#{action.targets.join(' = ')} = #{dsl.add_callback(action.value)}\n"
+ when Symbol
+ @code << "#{action.targets.join(' = ')} = #{action.value}\n"
+ else
+ @code << "#{action.targets.join(' = ')} = #{action.value.inspect}\n"
+ end
+
+ when Increment
+ case action.value
+ when Proc
+ @code << "#{action.targets.join(' = ')} #{action.operation}= #{dsl.add_callback(action.value)}\n"
+ when Symbol
+ @code << "#{action.targets.join(' = ')} #{action.operation}= #{action.value}\n"
+ else
+ @code << "#{action.targets.join(' = ')} #{action.operation}= #{action.value.inspect}\n"
+ end
+
+ when Proc
+ @code << "#{dsl.add_callback(action)}\n"
+
+ when Continue
+ @code << "next\n"
+
+ else
+ raise "I don't know how to evaluate this action: %p" % [action]
+ end
+ end
+
+ @first = false
+ end
+
+ def groups *token_kinds
+ Groups.new token_kinds
+ end
+
+ def pattern pattern = nil, &block
+ Pattern.new pattern || block
+ end
+
+ def kind token_kind = nil, &block
+ Kind.new token_kind || block
+ end
+
+ def push state = nil, group = state, &block
+ raise 'push requires a state or a block; got nothing' unless state || block
+ Push.new state || block, group || block
+ end
+
+ def pop group = nil
+ Pop.new group
+ end
+
+ def push_state state = nil, &block
+ raise 'push_state requires a state or a block; got nothing' unless state || block
+ PushState.new state || block
+ end
+
+ def pop_state
+ PopState.new
+ end
+
+ def check_if value = nil, &callback
+ CheckIf.new value || callback
+ end
+
+ def check_unless value = nil, &callback
+ CheckUnless.new value || callback
+ end
+
+ def flag_on *flags
+ flags.each { |name| dsl.add_variable name }
+ ValueSetter.new Array(flags), true
+ end
+
+ def flag_off *flags
+ flags.each { |name| dsl.add_variable name }
+ ValueSetter.new Array(flags), false
+ end
+
+ def set flag, value = nil, &callback
+ dsl.add_variable flag
+ ValueSetter.new [flag], value || callback
+ end
+
+ def unset *flags
+ flags.each { |name| dsl.add_variable name }
+ ValueSetter.new Array(flags), nil
+ end
+
+ def increment *counters
+ counters.each { |name| dsl.add_variable name }
+ Increment.new Array(counters), :+, 1
+ end
+
+ def decrement *counters
+ counters.each { |name| dsl.add_variable name }
+ Increment.new Array(counters), :-, 1
+ end
+
+ def continue
+ Continue.new
+ end
+ end
+
+ attr_accessor :states
+
+ def state *names, &block
+ @states ||= []
+ @states << State.new(names, block, self)
+ end
+
+ def add_callback block
+ base_name = "__callback_line_#{block.source_location.last}"
+ callback_name = base_name
+ counter = 'a'
+ while callbacks.key?(callback_name)
+ callback_name = "#{base_name}_#{counter}"
+ counter.succ!
+ end
+
+ callbacks[callback_name] = define_method(callback_name, &block)
+
+ parameters = block.parameters
+
+ if parameters.empty?
+ callback_name
+ else
+ parameter_names = parameters.map(&:last)
+ parameter_names.each { |name| variables << name }
+ "#{callback_name}(#{parameter_names.join(', ')})"
+ end
+ end
+
+ def add_variable name
+ variables << name
+ end
+
+ protected
+
+ def callbacks
+ @callbacks ||= {}
+ end
+
+ def variables
+ @variables ||= Set.new
+ end
+
+ def additional_variables
+ variables - %i(encoder options state states match kind)
+ end
+
+ def scan_tokens_code
+ <<-"RUBY"
+state = options[:state] || @state
+states = [state]
+#{ restore_local_variables_code.chomp }
+
+until eos?
+ case state
+#{ states_code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ end
+end
+
+@state = state if options[:keep_state]
+
+#{ close_groups_code.chomp }
+
+encoder
+ RUBY
+ end
+
+ def restore_local_variables_code
+ additional_variables.sort.map { |name| "#{name} = @#{name}" }.join("\n")
+ end
+
+ def states_code
+ @states.map(&:code)[0,1].join
+ end
+
+ def close_groups_code
+ 'close_groups(encoder, states)'
+ end
+ end
+ end
+end \ No newline at end of file
diff --git a/lib/coderay/state_based_scanner.rb b/lib/coderay/state_based_scanner.rb
new file mode 100644
index 0000000..b196adc
--- /dev/null
+++ b/lib/coderay/state_based_scanner.rb
@@ -0,0 +1,394 @@
+require 'set'
+
+module CodeRay
+ module Scanners
+ class StateBasedScanner < Scanner
+ class State
+ attr_reader :names
+ attr_reader :rules
+ attr_reader :scanner
+
+ def initialize scanner, names, &block
+ @scanner = scanner
+ @names = names
+
+ @rules = []
+ @check = nil
+
+ instance_eval(&block)
+ end
+
+ def rules_code
+ <<-RUBY
+when #{names.map(&:inspect).join(', ')}
+#{rules.map.with_index { |rule, index| rule.code(first: index.zero?) }.join}
+ else
+ puts "no match for \#{state.inspect} => skip character" if $DEBUG
+ encoder.text_token getch, :error
+ end
+
+ RUBY
+ end
+
+ protected
+
+ # structure
+ def check *conditions, &block
+ return @check unless conditions.any? || block
+ raise "Can't nest check yet" if @check
+
+ @check = Conditions.new(conditions)
+ instance_eval(&block)
+ @check = nil
+ end
+
+ # rules
+ def on pattern, *actions, &block
+ @rules << Rule.new(self, pattern, *actions, check: @check, &block)
+ end
+
+ def skip pattern, *actions, &block
+ @rules << Rule.new(self, pattern, *actions, check: @check, skip: true, &block)
+ end
+
+ def otherwise *actions, &block
+ @rules << Rule.new(self, //, *actions, check: @check, skip: true, &block)
+ end
+
+ # actions
+ def push state
+ Push.new(state)
+ end
+
+ def pop
+ Pop.new
+ end
+
+ def kind token_kind = nil, &block
+ Kind.new token_kind || scanner.callback(block)
+ end
+
+ def groups *token_kinds
+ Groups.new(token_kinds)
+ end
+
+ def set target, value = nil, &block
+ Setter.new target, value || block || true
+ end
+
+ def callback block
+ scanner.callback(block)
+ end
+
+ # magic flag getters
+ def method_missing method, *args, &block
+ method_name = method.to_s
+ if method_name.end_with?('?')
+ Getter.new(scanner.variable(method_name.chomp('?')))
+ else
+ super
+ end
+ end
+ end
+
+ class GroupState < State
+ end
+
+ class Rule
+ attr_reader :pattern
+ attr_reader :actions
+ attr_reader :check
+ attr_reader :state
+
+ def initialize state, pattern, *actions, check:, skip: false, &block
+ @state = state
+ @pattern = (skip ? Skip : Scan).new(pattern)
+ @actions = *build_actions(actions, block)
+ @check = check
+
+ raise [pattern, *actions, check, skip, block].inspect if check == false
+ end
+
+ def code first:
+ <<-RUBI
+ #{'els' unless first}if #{condition_expression}
+#{actions_code.gsub(/^/, ' ' * 2)}
+ RUBI
+ end
+
+ def skip?
+ @pattern.is_a?(Skip)
+ end
+
+ protected
+
+ def condition_expression
+ [check, pattern].compact.map(&:code).join(' && ')
+ end
+
+ def actions_code
+ actions.map(&:code).join("\n")
+ end
+
+ def build_actions actions, block
+ actions += [block] if block
+
+ actions.map do |action|
+ case action
+ when Symbol
+ Token.new(action)
+ when Proc
+ state.instance_eval do
+ callback action
+ end
+ when WordList
+ state.instance_eval do
+ kind { |match| action[match] }
+ end
+ when Push, Pop, Groups, Kind, Setter
+ action
+ else
+ raise "Don't know how to build action for %p (%p)" % [action, action.class]
+ end
+ end
+ end
+ end
+
+ # conditions
+ class Conditions < Struct.new(:conditions)
+ def code
+ "#{conditions.map(&:code).join(' && ')}"
+ end
+ end
+
+ class Scan < Struct.new(:pattern)
+ def code
+ "match = scan(#{pattern.inspect})"
+ end
+ end
+
+ class Skip < Scan
+ end
+
+ class Getter < Struct.new(:name, :negative)
+ def code
+ "#{negative && '!'}#{name}"
+ end
+
+ def !@
+ negative
+ end
+
+ protected
+
+ def negative
+ @negative ||= Getter.new(name, :negative)
+ end
+ end
+
+ # actions
+ class Push < Struct.new :state
+ def code
+ "push"
+ end
+ end
+
+ class Pop < Class.new
+ def code
+ "pop"
+ end
+ end
+
+ class Groups < Struct.new(:token_kinds)
+ def code
+ "groups"
+ end
+ end
+
+ class Setter < Struct.new(:name, :value)
+ def code
+ "set"
+ end
+ end
+
+
+ class Kind < Struct.new(:token_kind)
+ def code
+ case token_kind
+ when Callback
+ "encoder.text_token match, kind = #{token_kind.code}\n"
+ else
+ raise "I don't know how to evaluate this kind: %p" % [token_kind]
+ end
+ end
+ end
+
+ class Token < Struct.new(:name)
+ def code
+ "encoder.text_token match, #{name.inspect}"
+ end
+ end
+
+ class Callback < Struct.new(:name, :block)
+ def code
+ if parameter_names.empty?
+ name
+ else
+ "#{name}(#{parameter_names.join(', ')})"
+ end
+ end
+
+ protected
+
+ def parameter_names
+ block.parameters.map(&:last)
+ end
+ end
+
+ class << self
+ def states
+ @states ||= {}
+ end
+
+ def scan_tokens tokens, options
+ self.class.define_scan_tokens!
+
+ scan_tokens tokens, options
+ end
+
+ def define_scan_tokens!
+ if ENV['PUTS']
+ puts CodeRay.scan(scan_tokens_code, :ruby).terminal
+ puts "callbacks: #{callbacks.size}"
+ end
+
+ class_eval scan_tokens_code
+ end
+
+ def variable name
+ variables << name.to_sym
+
+ name
+ end
+
+ def callback block
+ return unless block
+
+ callback_name = name_for_callback(block)
+ callbacks[callback_name] = define_method(callback_name, &block)
+ block.parameters.map(&:last).each { |name| variable name }
+
+ Callback.new(callback_name, block)
+ end
+
+ protected
+
+ def state *names, state_class: State, &block
+ state_class.new(self, names, &block).tap do |state|
+ for name in names
+ states[name] = state
+ end
+ end
+ end
+
+ def group_state *names, &block
+ state(*names, state_class: GroupState, &block)
+ end
+
+ def callbacks
+ @callbacks ||= {}
+ end
+
+ def variables
+ @variables ||= Set.new
+ end
+
+ def additional_variables
+ variables - %i(encoder options state states match kind)
+ end
+
+ def name_for_callback block
+ base_name = "__callback_line_#{block.source_location.last}"
+ callback_name = base_name
+ counter = 'a'
+
+ while callbacks.key?(callback_name)
+ callback_name = "#{base_name}_#{counter}"
+ counter.succ!
+ end
+
+ callback_name
+ end
+
+ def scan_tokens_code
+ <<-"RUBY"
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+#{ restore_local_variables_code.chomp.gsub(/^/, ' ' * 3) }
+
+ states = [state]
+
+ until eos?
+ case state
+#{ states_code.chomp.gsub(/^/, ' ' * 4) }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ end
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+#{ close_groups_code.chomp.gsub(/^/, ' ' * 3) }
+
+ encoder
+ end
+ RUBY
+ end
+
+ def states_code
+ states.values.map(&:rules_code).join
+ end
+
+ def restore_local_variables_code
+ additional_variables.sort.map { |name| "#{name} = @#{name}" }.join("\n")
+ end
+
+ def close_groups_code
+ "close_groups(encoder, states)"
+ end
+ end
+
+ def scan_tokens tokens, options
+ self.class.define_scan_tokens!
+
+ scan_tokens tokens, options
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ reset_expectations
+ end
+
+ def close_groups encoder, states
+ # TODO
+ end
+
+ def expect kind
+ @expected = kind
+ end
+
+ def expected? kind
+ @expected == kind
+ end
+
+ def reset_expectations
+ @expected = nil
+ end
+ end
+ end
+end
diff --git a/lib/coderay/version.rb b/lib/coderay/version.rb
index f5e7a39..ed87d63 100644
--- a/lib/coderay/version.rb
+++ b/lib/coderay/version.rb
@@ -1,3 +1,3 @@
module CodeRay
- VERSION = '1.1.2'
+ VERSION = '2.0.0'
end
diff --git a/rake_tasks/test.rake b/rake_tasks/test.rake
index ce32a02..58e6daa 100644
--- a/rake_tasks/test.rake
+++ b/rake_tasks/test.rake
@@ -48,6 +48,11 @@ Please rename or remove it and run again to use the GitHub repository:
task lang => :update_scanner_suite do
ruby "./test/scanners/suite.rb #{lang}"
end
+ (1..5).each do |i|
+ task "#{lang}:#{i}" => :update_scanner_suite do
+ ruby "./test/scanners/suite.rb #{lang}:#{i}"
+ end
+ end
end
end
diff --git a/spec/simple_scanner_spec.rb b/spec/simple_scanner_spec.rb
new file mode 100644
index 0000000..088343c
--- /dev/null
+++ b/spec/simple_scanner_spec.rb
@@ -0,0 +1,28 @@
+RSpec.describe CodeRay::Scanners::SimpleScanner do
+ let(:scanner) { Class.new described_class }
+
+ describe '#scan_tokens_code' do
+ subject { scanner.send :scan_tokens_code }
+ it 'lets you define states' do
+ is_expected.to eq <<-RUBY
+state = options[:state] || @state
+states = [state]
+
+
+until eos?
+ case state
+
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ end
+end
+
+@state = state if options[:keep_state]
+
+close_groups(encoder, states)
+
+encoder
+ RUBY
+ end
+ end
+end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
new file mode 100644
index 0000000..49b6a0e
--- /dev/null
+++ b/spec/spec_helper.rb
@@ -0,0 +1,96 @@
+# This file was generated by the `rspec --init` command. Conventionally, all
+# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
+# The generated `.rspec` file contains `--require spec_helper` which will cause
+# this file to always be loaded, without a need to explicitly require it in any
+# files.
+#
+# Given that it is always loaded, you are encouraged to keep this file as
+# light-weight as possible. Requiring heavyweight dependencies from this file
+# will add to the boot time of your test suite on EVERY test run, even for an
+# individual file that may not need all of that loaded. Instead, consider making
+# a separate helper file that requires the additional dependencies and performs
+# the additional setup, and require it from the spec files that actually need
+# it.
+#
+# The `.rspec` file also contains a few flags that are not defaults but that
+# users commonly want.
+#
+# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+ # rspec-expectations config goes here. You can use an alternate
+ # assertion/expectation library such as wrong or the stdlib/minitest
+ # assertions if you prefer.
+ config.expect_with :rspec do |expectations|
+ # This option will default to `true` in RSpec 4. It makes the `description`
+ # and `failure_message` of custom matchers include text for helper methods
+ # defined using `chain`, e.g.:
+ # be_bigger_than(2).and_smaller_than(4).description
+ # # => "be bigger than 2 and smaller than 4"
+ # ...rather than:
+ # # => "be bigger than 2"
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
+ end
+
+ # rspec-mocks config goes here. You can use an alternate test double
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
+ config.mock_with :rspec do |mocks|
+ # Prevents you from mocking or stubbing a method that does not exist on
+ # a real object. This is generally recommended, and will default to
+ # `true` in RSpec 4.
+ mocks.verify_partial_doubles = true
+ end
+
+ # These two settings work together to allow you to limit a spec run
+ # to individual examples or groups you care about by tagging them with
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
+ # get run.
+ config.filter_run :focus
+ config.run_all_when_everything_filtered = true
+
+ # Allows RSpec to persist some state between runs in order to support
+ # the `--only-failures` and `--next-failure` CLI options. We recommend
+ # you configure your source control system to ignore this file.
+ config.example_status_persistence_file_path = "spec/examples.txt"
+
+ # Limits the available syntax to the non-monkey patched syntax that is
+ # recommended. For more details, see:
+ # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
+ # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
+ config.disable_monkey_patching!
+
+ # This setting enables warnings. It's recommended, but in some cases may
+ # be too noisy due to issues in dependencies.
+ config.warnings = true
+
+ # Many RSpec users commonly either run the entire suite or an individual
+ # file, and it's useful to allow more verbose output when running an
+ # individual spec file.
+ if config.files_to_run.one?
+ # Use the documentation formatter for detailed output,
+ # unless a formatter has already been configured
+ # (e.g. via a command-line flag).
+ config.default_formatter = 'doc'
+ end
+
+ # Print the 10 slowest examples and example groups at the
+ # end of the spec run, to help surface which specs are running
+ # particularly slow.
+ config.profile_examples = 10
+
+ # Run specs in random order to surface order dependencies. If you find an
+ # order dependency and want to debug it, you can fix the order by providing
+ # the seed, which is printed after each run.
+ # --seed 1234
+ config.order = :random
+
+ # Seed global randomization in this process using the `--seed` CLI option.
+ # Setting this allows you to use `--seed` to deterministically reproduce
+ # test failures related to randomization by passing the same `--seed` value
+ # as the one that triggered the failure.
+ Kernel.srand config.seed
+end
+
+$LOAD_PATH << 'lib/coderay'
+
+require 'coderay'