From 434006c0a68d2884c05fe97dcee8db69e87a9a99 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Mon, 27 Nov 2017 14:07:03 +0100 Subject: testing rouge scanner --- lib/coderay/rouge_scanner.rb | 49 +++++++ lib/coderay/rouge_scanner_dsl.rb | 199 ++++++++++++++++++++++++++ lib/coderay/scanners.rb | 1 + lib/coderay/scanners/_map.rb | 1 + lib/coderay/scanners/java_script7.rb | 268 +++++++++++++++++++++++++++++++++++ 5 files changed, 518 insertions(+) create mode 100644 lib/coderay/rouge_scanner.rb create mode 100644 lib/coderay/rouge_scanner_dsl.rb create mode 100644 lib/coderay/scanners/java_script7.rb diff --git a/lib/coderay/rouge_scanner.rb b/lib/coderay/rouge_scanner.rb new file mode 100644 index 0000000..b08d7fa --- /dev/null +++ b/lib/coderay/rouge_scanner.rb @@ -0,0 +1,49 @@ +require 'set' +require 'coderay/rouge_scanner_dsl' + +module CodeRay + module Scanners + class RougeScanner < Scanner + require 'rouge' + include Rouge::Token::Tokens + + extend RougeScannerDSL + + class << self + def define_scan_tokens! + if ENV['PUTS'] + puts CodeRay.scan(scan_tokens_code, :ruby).terminal + puts "callbacks: #{callbacks.size}" + end + + class_eval <<-RUBY +def scan_tokens encoder, options + @encoder = encoder +#{ scan_tokens_code.chomp.gsub(/^/, ' ') } +end + RUBY + end + end + + def scan_tokens tokens, options + self.class.define_scan_tokens! + + scan_tokens tokens, options + end + + protected + + def setup + @state = :root + end + + def close_groups encoder, states + # TODO + end + + def token token + @encoder.text_token @match, token + end + end + end +end \ No newline at end of file diff --git a/lib/coderay/rouge_scanner_dsl.rb b/lib/coderay/rouge_scanner_dsl.rb new file mode 100644 index 0000000..38b06f5 --- /dev/null +++ b/lib/coderay/rouge_scanner_dsl.rb @@ -0,0 +1,199 @@ +require 'set' + +module CodeRay + module Scanners + module RougeScannerDSL + NoStatesError = Class.new StandardError + + State = Struct.new :name, :rules do + def initialize(name, &block) + super name, [] + + instance_eval(&block) + end + + def code scanner + <<-RUBY +when #{name.inspect} +#{ rules_code(scanner).chomp.gsub(/^/, ' ') } + else + encoder.text_token getch, :error + end + RUBY + end + + def rules_code scanner, first: true + raise 'no rules defined for %p' % [self] if rules.empty? + + [ + rules.first.code(scanner, first: first), + *rules.drop(1).map { |rule| rule.code(scanner) } + ].join + end + + protected + + # DSL + + def rule pattern, token = nil, next_state = nil, &block + unless token || block + raise 'please pass `rule` a token to yield or a callback' + end + + case token + when Class + unless token < Rouge::Token + raise "invalid token: #{token.inspect}" + end + + case next_state + when Symbol + rules << Rule.new(pattern, token, next_state) + when nil + rules << Rule.new(pattern, token) + else + raise "invalid next state: #{next_state.inspect}" + end + when nil + rules << CallbackRule.new(pattern, block) + else + raise "invalid token: #{token.inspect}" + end + end + + def mixin state_name + rules << Mixin.new(state_name) + end + end + + Rule = Struct.new :pattern, :token, :action do + def initialize(pattern, token, action = nil) + super + end + + def code scanner, first: false + <<-RUBY + action_code.to_s +#{'els' unless first}if match = scan(#{pattern.inspect}) + encoder.text_token match, #{token.token_chain.map(&:name).join('::')} + RUBY + end + + def action_code + case action + when :pop! + <<-RUBY + states.pop + state = states.last + RUBY + when Symbol + <<-RUBY + state = #{action.inspect} + states << state + RUBY + end + end + end + + CallbackRule = Struct.new :pattern, :callback do + def code scanner, first: false + <<-RUBY +#{'els' unless first}if match = scan(#{pattern.inspect}) + @match = match + #{scanner.add_callback(callback)} + RUBY + end + end + + Mixin = Struct.new(:state_name) do + def code scanner, first: false + scanner.states[state_name].rules_code(scanner, first: first) + end + end + + attr_accessor :states + + def state name, &block + @states ||= {} + @states[name] = State.new(name, &block) + end + + def add_callback block + base_name = "__callback_line_#{block.source_location.last}" + callback_name = base_name + counter = 'a' + while callbacks.key?(callback_name) + callback_name = "#{base_name}_#{counter}" + counter = counter.succ + end + + callbacks[callback_name] = define_method(callback_name, &block) + + parameters = block.parameters + + if parameters.empty? + callback_name + else + parameter_names = parameters.map do |type, name| + raise "callbacks don't allow rest parameters: %p" % [parameters] unless type == :req || type == :opt + name = :match if name == :m + name + end + + parameter_names.each { |name| variables << name } + "#{callback_name}(#{parameter_names.join(', ')})" + end + end + + def add_variable name + variables << name + end + + protected + + def callbacks + @callbacks ||= {} + end + + def variables + @variables ||= Set.new + end + + def additional_variables + variables - %i(encoder options state states match kind) + end + + def scan_tokens_code + <<-"RUBY" +state = options[:state] || @state +states = [state] +#{ restore_local_variables_code } +until eos? + case state +#{ states_code.chomp.gsub(/^/, ' ') } + else + raise_inspect 'Unknown state: %p' % [state], encoder + end +end + +@state = state if options[:keep_state] + +close_groups(encoder, states) + +encoder + RUBY + end + + def restore_local_variables_code + additional_variables.sort.map { |name| "#{name} = @#{name}" }.join("\n") + end + + def states_code + unless defined?(@states) && !@states.empty? + raise NoStatesError, 'no states defined for %p' % [self.class] + end + + @states.values.map { |state| state.code(self) }.join + end + end + end +end \ No newline at end of file diff --git a/lib/coderay/scanners.rb b/lib/coderay/scanners.rb index 0935458..5892f52 100644 --- a/lib/coderay/scanners.rb +++ b/lib/coderay/scanners.rb @@ -26,6 +26,7 @@ module CodeRay autoload :RuleBasedScanner, CodeRay.coderay_path('rule_based_scanner') autoload :SingleStateRuleBasedScanner, CodeRay.coderay_path('single_state_rule_based_scanner') autoload :StateBasedScanner, CodeRay.coderay_path('state_based_scanner') + autoload :RougeScanner, CodeRay.coderay_path('rouge_scanner') autoload :SimpleScanner, CodeRay.coderay_path('simple_scanner') end diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 82fb17f..4d836e6 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -16,6 +16,7 @@ module Scanners :javascript4 => :java_script4, :javascript5 => :java_script5, :javascript6 => :java_script6, + :javascript7 => :java_script7, :js => :java_script, :pascal => :delphi, :patch => :diff, diff --git a/lib/coderay/scanners/java_script7.rb b/lib/coderay/scanners/java_script7.rb new file mode 100644 index 0000000..082a781 --- /dev/null +++ b/lib/coderay/scanners/java_script7.rb @@ -0,0 +1,268 @@ +# Trying to imitate https://github.com/jneen/rouge/blob/master/lib/rouge/lexers/javascript.rb. +module CodeRay +module Scanners + + # Scanner for JavaScript. + # + # Aliases: +ecmascript+, +ecma_script+, +javascript+ + class JavaScript7 < RougeScanner + register_for :java_script7 + file_extension 'js' + + state :multiline_comment do + rule %r([*]/), Comment::Multiline, :pop! + rule %r([^*/]+), Comment::Multiline + rule %r([*/]), Comment::Multiline + end + + state :comments_and_whitespace do + rule /\s+/, Text + rule /