summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2017-11-27 14:07:03 +0100
committerKornelius Kalnbach <murphy@rubychan.de>2017-11-27 14:07:03 +0100
commit434006c0a68d2884c05fe97dcee8db69e87a9a99 (patch)
treebeff12ea445f3847c6e5c26f12a76686000db508
parent3a703d2a0564bf9b98d5390a12f3ddc504453efa (diff)
downloadcoderay-dsl.tar.gz
testing rouge scannerdsl
-rw-r--r--lib/coderay/rouge_scanner.rb49
-rw-r--r--lib/coderay/rouge_scanner_dsl.rb199
-rw-r--r--lib/coderay/scanners.rb1
-rw-r--r--lib/coderay/scanners/_map.rb1
-rw-r--r--lib/coderay/scanners/java_script7.rb268
5 files changed, 518 insertions, 0 deletions
diff --git a/lib/coderay/rouge_scanner.rb b/lib/coderay/rouge_scanner.rb
new file mode 100644
index 0000000..b08d7fa
--- /dev/null
+++ b/lib/coderay/rouge_scanner.rb
@@ -0,0 +1,49 @@
+require 'set'
+require 'coderay/rouge_scanner_dsl'
+
+module CodeRay
+ module Scanners
+ class RougeScanner < Scanner
+ require 'rouge'
+ include Rouge::Token::Tokens
+
+ extend RougeScannerDSL
+
+ class << self
+ def define_scan_tokens!
+ if ENV['PUTS']
+ puts CodeRay.scan(scan_tokens_code, :ruby).terminal
+ puts "callbacks: #{callbacks.size}"
+ end
+
+ class_eval <<-RUBY
+def scan_tokens encoder, options
+ @encoder = encoder
+#{ scan_tokens_code.chomp.gsub(/^/, ' ') }
+end
+ RUBY
+ end
+ end
+
+ def scan_tokens tokens, options
+ self.class.define_scan_tokens!
+
+ scan_tokens tokens, options
+ end
+
+ protected
+
+ def setup
+ @state = :root
+ end
+
+ def close_groups encoder, states
+ # TODO
+ end
+
+ def token token
+ @encoder.text_token @match, token
+ end
+ end
+ end
+end \ No newline at end of file
diff --git a/lib/coderay/rouge_scanner_dsl.rb b/lib/coderay/rouge_scanner_dsl.rb
new file mode 100644
index 0000000..38b06f5
--- /dev/null
+++ b/lib/coderay/rouge_scanner_dsl.rb
@@ -0,0 +1,199 @@
+require 'set'
+
+module CodeRay
+ module Scanners
+ module RougeScannerDSL
+ NoStatesError = Class.new StandardError
+
+ State = Struct.new :name, :rules do
+ def initialize(name, &block)
+ super name, []
+
+ instance_eval(&block)
+ end
+
+ def code scanner
+ <<-RUBY
+when #{name.inspect}
+#{ rules_code(scanner).chomp.gsub(/^/, ' ') }
+ else
+ encoder.text_token getch, :error
+ end
+ RUBY
+ end
+
+ def rules_code scanner, first: true
+ raise 'no rules defined for %p' % [self] if rules.empty?
+
+ [
+ rules.first.code(scanner, first: first),
+ *rules.drop(1).map { |rule| rule.code(scanner) }
+ ].join
+ end
+
+ protected
+
+ # DSL
+
+ def rule pattern, token = nil, next_state = nil, &block
+ unless token || block
+ raise 'please pass `rule` a token to yield or a callback'
+ end
+
+ case token
+ when Class
+ unless token < Rouge::Token
+ raise "invalid token: #{token.inspect}"
+ end
+
+ case next_state
+ when Symbol
+ rules << Rule.new(pattern, token, next_state)
+ when nil
+ rules << Rule.new(pattern, token)
+ else
+ raise "invalid next state: #{next_state.inspect}"
+ end
+ when nil
+ rules << CallbackRule.new(pattern, block)
+ else
+ raise "invalid token: #{token.inspect}"
+ end
+ end
+
+ def mixin state_name
+ rules << Mixin.new(state_name)
+ end
+ end
+
+ Rule = Struct.new :pattern, :token, :action do
+ def initialize(pattern, token, action = nil)
+ super
+ end
+
+ def code scanner, first: false
+ <<-RUBY + action_code.to_s
+#{'els' unless first}if match = scan(#{pattern.inspect})
+ encoder.text_token match, #{token.token_chain.map(&:name).join('::')}
+ RUBY
+ end
+
+ def action_code
+ case action
+ when :pop!
+ <<-RUBY
+ states.pop
+ state = states.last
+ RUBY
+ when Symbol
+ <<-RUBY
+ state = #{action.inspect}
+ states << state
+ RUBY
+ end
+ end
+ end
+
+ CallbackRule = Struct.new :pattern, :callback do
+ def code scanner, first: false
+ <<-RUBY
+#{'els' unless first}if match = scan(#{pattern.inspect})
+ @match = match
+ #{scanner.add_callback(callback)}
+ RUBY
+ end
+ end
+
+ Mixin = Struct.new(:state_name) do
+ def code scanner, first: false
+ scanner.states[state_name].rules_code(scanner, first: first)
+ end
+ end
+
+ attr_accessor :states
+
+ def state name, &block
+ @states ||= {}
+ @states[name] = State.new(name, &block)
+ end
+
+ def add_callback block
+ base_name = "__callback_line_#{block.source_location.last}"
+ callback_name = base_name
+ counter = 'a'
+ while callbacks.key?(callback_name)
+ callback_name = "#{base_name}_#{counter}"
+ counter = counter.succ
+ end
+
+ callbacks[callback_name] = define_method(callback_name, &block)
+
+ parameters = block.parameters
+
+ if parameters.empty?
+ callback_name
+ else
+ parameter_names = parameters.map do |type, name|
+ raise "callbacks don't allow rest parameters: %p" % [parameters] unless type == :req || type == :opt
+ name = :match if name == :m
+ name
+ end
+
+ parameter_names.each { |name| variables << name }
+ "#{callback_name}(#{parameter_names.join(', ')})"
+ end
+ end
+
+ def add_variable name
+ variables << name
+ end
+
+ protected
+
+ def callbacks
+ @callbacks ||= {}
+ end
+
+ def variables
+ @variables ||= Set.new
+ end
+
+ def additional_variables
+ variables - %i(encoder options state states match kind)
+ end
+
+ def scan_tokens_code
+ <<-"RUBY"
+state = options[:state] || @state
+states = [state]
+#{ restore_local_variables_code }
+until eos?
+ case state
+#{ states_code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ end
+end
+
+@state = state if options[:keep_state]
+
+close_groups(encoder, states)
+
+encoder
+ RUBY
+ end
+
+ def restore_local_variables_code
+ additional_variables.sort.map { |name| "#{name} = @#{name}" }.join("\n")
+ end
+
+ def states_code
+ unless defined?(@states) && !@states.empty?
+ raise NoStatesError, 'no states defined for %p' % [self.class]
+ end
+
+ @states.values.map { |state| state.code(self) }.join
+ end
+ end
+ end
+end \ No newline at end of file
diff --git a/lib/coderay/scanners.rb b/lib/coderay/scanners.rb
index 0935458..5892f52 100644
--- a/lib/coderay/scanners.rb
+++ b/lib/coderay/scanners.rb
@@ -26,6 +26,7 @@ module CodeRay
autoload :RuleBasedScanner, CodeRay.coderay_path('rule_based_scanner')
autoload :SingleStateRuleBasedScanner, CodeRay.coderay_path('single_state_rule_based_scanner')
autoload :StateBasedScanner, CodeRay.coderay_path('state_based_scanner')
+ autoload :RougeScanner, CodeRay.coderay_path('rouge_scanner')
autoload :SimpleScanner, CodeRay.coderay_path('simple_scanner')
end
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb
index 82fb17f..4d836e6 100644
--- a/lib/coderay/scanners/_map.rb
+++ b/lib/coderay/scanners/_map.rb
@@ -16,6 +16,7 @@ module Scanners
:javascript4 => :java_script4,
:javascript5 => :java_script5,
:javascript6 => :java_script6,
+ :javascript7 => :java_script7,
:js => :java_script,
:pascal => :delphi,
:patch => :diff,
diff --git a/lib/coderay/scanners/java_script7.rb b/lib/coderay/scanners/java_script7.rb
new file mode 100644
index 0000000..082a781
--- /dev/null
+++ b/lib/coderay/scanners/java_script7.rb
@@ -0,0 +1,268 @@
+# Trying to imitate https://github.com/jneen/rouge/blob/master/lib/rouge/lexers/javascript.rb.
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript7 < RougeScanner
+ register_for :java_script7
+ file_extension 'js'
+
+ state :multiline_comment do
+ rule %r([*]/), Comment::Multiline, :pop!
+ rule %r([^*/]+), Comment::Multiline
+ rule %r([*/]), Comment::Multiline
+ end
+
+ state :comments_and_whitespace do
+ rule /\s+/, Text
+ rule /<!--/, Comment # really...?
+ rule %r(//.*?$), Comment::Single
+ rule %r(/[*]), Comment::Multiline, :multiline_comment
+ end
+
+ state :expr_start do
+ mixin :comments_and_whitespace
+
+ rule %r(/) do
+ token Str::Regex
+ goto :regex
+ end
+
+ rule /[{]/ do
+ token Punctuation
+ goto :object
+ end
+
+ rule //, Text, :pop!
+ end
+
+ state :regex do
+ rule %r(/) do
+ token Str::Regex
+ goto :regex_end
+ end
+
+ rule %r([^/]\n), Error, :pop!
+
+ rule /\n/, Error, :pop!
+ rule /\[\^/, Str::Escape, :regex_group
+ rule /\[/, Str::Escape, :regex_group
+ rule /\\./, Str::Escape
+ rule %r{[(][?][:=<!]}, Str::Escape
+ rule /[{][\d,]+[}]/, Str::Escape
+ rule /[()?]/, Str::Escape
+ rule /./, Str::Regex
+ end
+
+ state :regex_end do
+ rule /[gim]+/, Str::Regex, :pop!
+ rule(//) { pop! }
+ end
+
+ state :regex_group do
+ # specially highlight / in a group to indicate that it doesn't
+ # close the regex
+ rule /\//, Str::Escape
+
+ rule %r([^/]\n) do
+ token Error
+ pop! 2
+ end
+
+ rule /\]/, Str::Escape, :pop!
+ rule /\\./, Str::Escape
+ rule /./, Str::Regex
+ end
+
+ state :bad_regex do
+ rule /[^\n]+/, Error, :pop!
+ end
+
+ def self.keywords
+ @keywords ||= Set.new %w(
+ for in of while do break return continue switch case default
+ if else throw try catch finally new delete typeof instanceof
+ void this yield import export from as async super this
+ )
+ end
+
+ def self.declarations
+ @declarations ||= Set.new %w(
+ var let const with function class
+ extends constructor get set
+ )
+ end
+
+ def self.reserved
+ @reserved ||= Set.new %w(
+ abstract boolean byte char debugger double enum
+ final float goto implements int interface
+ long native package private protected public short static
+ synchronized throws transient volatile
+ eval arguments await
+ )
+ end
+
+ def self.constants
+ @constants ||= Set.new %w(true false null NaN Infinity undefined)
+ end
+
+ def self.builtins
+ @builtins ||= %w(
+ Array Boolean Date Error Function Math netscape
+ Number Object Packages RegExp String sun decodeURI
+ decodeURIComponent encodeURI encodeURIComponent
+ Error eval isFinite isNaN parseFloat parseInt
+ document window navigator self global
+ Promise Set Map WeakSet WeakMap Symbol Proxy Reflect
+ Int8Array Uint8Array Uint8ClampedArray
+ Int16Array Uint16Array Uint16ClampedArray
+ Int32Array Uint32Array Uint32ClampedArray
+ Float32Array Float64Array DataView ArrayBuffer
+ )
+ end
+
+ def self.id_regex
+ /[$a-z_][a-z0-9_]*/io
+ end
+
+ id = self.id_regex
+
+ state :root do
+ rule /\A\s*#!.*?\n/m, Comment::Preproc, :statement
+ rule %r((?<=\n)(?=\s|/|<!--)), Text, :expr_start
+ mixin :comments_and_whitespace
+ rule %r(\+\+ | -- | ~ | && | \|\| | \\(?=\n) | << | >>>? | ===
+ | !== )x,
+ Operator, :expr_start
+ rule %r([-<>+*%&|\^/!=]=?), Operator, :expr_start
+ rule /[(\[,]/, Punctuation, :expr_start
+ rule /;/, Punctuation, :statement
+ rule /[)\].]/, Punctuation
+
+ rule /`/ do
+ token Str::Double
+ push :template_string
+ end
+
+ rule /[?]/ do
+ token Punctuation
+ push :ternary
+ push :expr_start
+ end
+
+ rule /(\@)(\w+)?/ do
+ groups Punctuation, Name::Decorator
+ push :expr_start
+ end
+
+ rule /[{}]/, Punctuation, :statement
+
+ rule id do |m|
+ if self.class.keywords.include? m[0]
+ token Keyword
+ push :expr_start
+ elsif self.class.declarations.include? m[0]
+ token Keyword::Declaration
+ push :expr_start
+ elsif self.class.reserved.include? m[0]
+ token Keyword::Reserved
+ elsif self.class.constants.include? m[0]
+ token Keyword::Constant
+ elsif self.class.builtins.include? m[0]
+ token Name::Builtin
+ else
+ token Name::Other
+ end
+ end
+
+ rule /[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?/, Num::Float
+ rule /0x[0-9a-fA-F]+/i, Num::Hex
+ rule /0o[0-7][0-7_]*/i, Num::Oct
+ rule /0b[01][01_]*/i, Num::Bin
+ rule /[0-9]+/, Num::Integer
+
+ rule /"/, Str::Double, :dq
+ rule /'/, Str::Single, :sq
+ rule /:/, Punctuation
+ end
+
+ state :dq do
+ rule /[^\\"]+/, Str::Double
+ rule /\\"/, Str::Escape
+ rule /"/, Str::Double, :pop!
+ end
+
+ state :sq do
+ rule /[^\\']+/, Str::Single
+ rule /\\'/, Str::Escape
+ rule /'/, Str::Single, :pop!
+ end
+
+ # braced parts that aren't object literals
+ state :statement do
+ rule /case\b/ do
+ token Keyword
+ goto :expr_start
+ end
+
+ rule /(#{id})(\s*)(:)/ do
+ groups Name::Label, Text, Punctuation
+ end
+
+ rule /[{}]/, Punctuation
+
+ mixin :expr_start
+ end
+
+ # object literals
+ state :object do
+ mixin :comments_and_whitespace
+
+ rule /[{]/ do
+ token Punctuation
+ push
+ end
+
+ rule /[}]/ do
+ token Punctuation
+ goto :statement
+ end
+
+ rule /(#{id})(\s*)(:)/ do
+ groups Name::Attribute, Text, Punctuation
+ push :expr_start
+ end
+
+ rule /:/, Punctuation
+ mixin :root
+ end
+
+ # ternary expressions, where <id>: is not a label!
+ state :ternary do
+ rule /:/ do
+ token Punctuation
+ goto :expr_start
+ end
+
+ mixin :root
+ end
+
+ # template strings
+ state :template_string do
+ rule /\${/, Punctuation, :template_string_expr
+ rule /`/, Str::Double, :pop!
+ rule /(\\\\|\\[\$`]|[^\$`]|\$(?!{))*/, Str::Double
+ end
+
+ state :template_string_expr do
+ rule /}/, Punctuation, :pop!
+ mixin :root
+ end
+
+ end
+
+end
+end