summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2016-02-13 15:57:11 +0100
committerKornelius Kalnbach <murphy@rubychan.de>2016-02-13 15:57:11 +0100
commit8d908474eff99a0a653bbc94f459537d56b83e92 (patch)
tree4b066ea50ca5f95f07782d0c58f8081c4c3ef39f /lib/coderay/scanners
parentbf395bd14ed7f63aab84955f6768f59965e3a73e (diff)
parent0a1f500d524ff0fb5eeafef051ccbb641954a87a (diff)
downloadcoderay-8d908474eff99a0a653bbc94f459537d56b83e92.tar.gz
Merge branch 'master' into cleanup-outputcleanup-output
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/debug.rb26
-rw-r--r--lib/coderay/scanners/diff.rb2
-rw-r--r--lib/coderay/scanners/go.rb1
-rw-r--r--lib/coderay/scanners/raydebug.rb31
-rw-r--r--lib/coderay/scanners/ruby.rb19
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb7
-rw-r--r--lib/coderay/scanners/ruby/string_state.rb22
-rw-r--r--lib/coderay/scanners/sql.rb42
8 files changed, 89 insertions, 61 deletions
diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb
index 566bfa7..83ede9a 100644
--- a/lib/coderay/scanners/debug.rb
+++ b/lib/coderay/scanners/debug.rb
@@ -1,9 +1,11 @@
+require 'set'
+
module CodeRay
module Scanners
# = Debug Scanner
#
- # Interprets the output of the Encoders::Debug encoder.
+ # Interprets the output of the Encoders::Debug encoder (basically the inverse function).
class Debug < Scanner
register_for :debug
@@ -11,6 +13,11 @@ module Scanners
protected
+ def setup
+ super
+ @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set
+ end
+
def scan_tokens encoder, options
opened_tokens = []
@@ -21,16 +28,19 @@ module Scanners
encoder.text_token match, :space
elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x)
- kind = self[1].to_sym
- match = self[2].gsub(/\\(.)/m, '\1')
- unless TokenKinds.has_key? kind
- kind = :error
- match = matched
+ if @known_token_kinds.include? self[1]
+ encoder.text_token self[2].gsub(/\\(.)/m, '\1'), self[1].to_sym
+ else
+ encoder.text_token matched, :unknown
end
- encoder.text_token match, kind
elsif match = scan(/ (\w+) ([<\[]) /x)
- kind = self[1].to_sym
+ if @known_token_kinds.include? self[1]
+ kind = self[1].to_sym
+ else
+ kind = :unknown
+ end
+
opened_tokens << kind
case self[2]
when '<'
diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb
index fd1aed6..74a6c27 100644
--- a/lib/coderay/scanners/diff.rb
+++ b/lib/coderay/scanners/diff.rb
@@ -100,7 +100,7 @@ module Scanners
next
elsif match = scan(/-/)
deleted_lines_count += 1
- if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/)
+ if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && changed_lines_count <= 100_000 && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/)
deleted_lines = Array.new(changed_lines_count) { |i| skip(/\n\-/) if i > 0; scan(/.*/) }
inserted_lines = Array.new(changed_lines_count) { |i| skip(/\n\+/) ; scan(/.*/) }
diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb
index 59473f6..99fdd63 100644
--- a/lib/coderay/scanners/go.rb
+++ b/lib/coderay/scanners/go.rb
@@ -1,7 +1,6 @@
module CodeRay
module Scanners
- # Scanner for Go, copy from c
class Go < Scanner
register_for :go
diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb
index d39d962..1effdc8 100644
--- a/lib/coderay/scanners/raydebug.rb
+++ b/lib/coderay/scanners/raydebug.rb
@@ -1,9 +1,11 @@
+require 'set'
+
module CodeRay
module Scanners
- # = Debug Scanner
+ # = Raydebug Scanner
#
- # Parses the output of the Encoders::Debug encoder.
+ # Highlights the output of the Encoders::Debug encoder.
class Raydebug < Scanner
register_for :raydebug
@@ -12,6 +14,11 @@ module Scanners
protected
+ def setup
+ super
+ @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set
+ end
+
def scan_tokens encoder, options
opened_tokens = []
@@ -26,20 +33,22 @@ module Scanners
encoder.text_token kind, :class
encoder.text_token '(', :operator
match = self[2]
- encoder.text_token match, kind.to_sym unless match.empty?
+ unless match.empty?
+ if @known_token_kinds.include? kind
+ encoder.text_token match, kind.to_sym
+ else
+ encoder.text_token match, :plain
+ end
+ end
encoder.text_token match, :operator if match = scan(/\)/)
elsif match = scan(/ (\w+) ([<\[]) /x)
- kind = self[1]
- case self[2]
- when '<'
- encoder.text_token kind, :class
- when '['
- encoder.text_token kind, :class
+ encoder.text_token self[1], :class
+ if @known_token_kinds.include? self[1]
+ kind = self[1].to_sym
else
- raise 'CodeRay bug: This case should not be reached.'
+ kind = :unknown
end
- kind = kind.to_sym
opened_tokens << kind
encoder.begin_group kind
encoder.text_token self[2], :operator
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index 80165ca..5b8de42 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -164,15 +164,19 @@ module Scanners
end
elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
- encoder.begin_group :string
if match.size == 1
+ kind = check(self.class::StringState.simple_key_pattern(match)) ? :key : :string
+ encoder.begin_group kind
encoder.text_token match, :delimiter
- state = self.class::StringState.new :string, match == '"', match # important for streaming
+ state = self.class::StringState.new kind, match == '"', match # important for streaming
else
+ kind = value_expected == true && scan(/:/) ? :key : :string
+ encoder.begin_group kind
encoder.text_token match[0,1], :delimiter
encoder.text_token match[1..-2], :content if match.size > 2
encoder.text_token match[-1,1], :delimiter
- encoder.end_group :string
+ encoder.end_group kind
+ encoder.text_token ':', :operator if kind == :key
value_expected = false
end
@@ -191,11 +195,14 @@ module Scanners
encoder.text_token match, :error
method_call_expected = false
else
- encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
+ kind = self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
+ match << 'r' if match !~ /e/i && scan(/r/)
+ match << 'i' if scan(/i/)
+ encoder.text_token match, kind
end
value_expected = false
- elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
+ elsif match = scan(/ [-+!~^\/]=? | [:;] | &\. | [*|&]{1,2}=? | >>? /x)
value_expected = true
encoder.text_token match, :operator
@@ -208,7 +215,7 @@ module Scanners
encoder.end_group kind
heredocs ||= [] # create heredocs if empty
heredocs << self.class::StringState.new(kind, quote != "'", delim,
- self[1] == '-' ? :indented : :linestart)
+ self[1] ? :indented : :linestart)
value_expected = false
elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index ed071d2..e5a156d 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -114,7 +114,7 @@ module Scanners
# NOTE: This is not completely correct, but
# nobody needs heredoc delimiters ending with \n.
HEREDOC_OPEN = /
- << (-)? # $1 = float
+ << ([-~])? # $1 = float
(?:
( [A-Za-z_0-9]+ ) # $2 = delim
|
@@ -157,13 +157,16 @@ module Scanners
yield
])
- FANCY_STRING_START = / % ( [QqrsWwx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x
+ FANCY_STRING_START = / % ( [iIqQrswWx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x
FANCY_STRING_KIND = Hash.new(:string).merge({
+ 'i' => :symbol,
+ 'I' => :symbol,
'r' => :regexp,
's' => :symbol,
'x' => :shell,
})
FANCY_STRING_INTERPRETED = Hash.new(true).merge({
+ 'i' => false,
'q' => false,
's' => false,
'w' => false,
diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb
index 2f398d1..95f1e83 100644
--- a/lib/coderay/scanners/ruby/string_state.rb
+++ b/lib/coderay/scanners/ruby/string_state.rb
@@ -16,7 +16,6 @@ module Scanners
STRING_PATTERN = Hash.new do |h, k|
delim, interpreted = *k
- # delim = delim.dup # workaround for old Ruby
delim_pattern = Regexp.escape(delim)
if closing_paren = CLOSING_PAREN[delim]
delim_pattern << Regexp.escape(closing_paren)
@@ -29,12 +28,21 @@ module Scanners
# '| [|?*+(){}\[\].^$]'
# end
- h[k] =
- if interpreted && delim != '#'
- / (?= [#{delim_pattern}] | \# [{$@] ) /mx
- else
- / (?= [#{delim_pattern}] ) /mx
- end
+ if interpreted && delim != '#'
+ / (?= [#{delim_pattern}] | \# [{$@] ) /mx
+ else
+ / (?= [#{delim_pattern}] ) /mx
+ end.tap do |pattern|
+ h[k] = pattern if (delim.respond_to?(:ord) ? delim.ord : delim[0]) < 256
+ end
+ end
+
+ def self.simple_key_pattern delim
+ if delim == "'"
+ / (?> (?: [^\\']+ | \\. )* ) ' : /mx
+ else
+ / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#\{[^\{\}]+\} | \#(?!\{) )* ) " : /mx
+ end
end
def initialize kind, interpreted, delim, heredoc = false
diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb
index 93aeaf3..7d57f77 100644
--- a/lib/coderay/scanners/sql.rb
+++ b/lib/coderay/scanners/sql.rb
@@ -57,6 +57,12 @@ module Scanners
STRING_PREFIXES = /[xnb]|_\w+/i
+ STRING_CONTENT_PATTERN = {
+ '"' => / (?: [^\\"] | "" )+ /x,
+ "'" => / (?: [^\\'] | '' )+ /x,
+ '`' => / (?: [^\\`] | `` )+ /x,
+ }
+
def scan_tokens encoder, options
state = :initial
@@ -90,7 +96,7 @@ module Scanners
state = :string
encoder.text_token match, :delimiter
- elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
+ elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9\$]* /x)
encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match])
name_expected = false
@@ -115,40 +121,26 @@ module Scanners
end
elsif state == :string
- if match = scan(/[^\\"'`]+/)
- string_content << match
- next
+ if match = scan(STRING_CONTENT_PATTERN[string_type])
+ encoder.text_token match, :content
elsif match = scan(/["'`]/)
if string_type == match
if peek(1) == string_type # doubling means escape
- string_content << string_type << getch
- next
- end
- unless string_content.empty?
- encoder.text_token string_content, :content
- string_content = ''
+ encoder.text_token match + getch, :content
+ else
+ encoder.text_token match, :delimiter
+ encoder.end_group :string
+ state = :initial
+ string_type = nil
end
- encoder.text_token match, :delimiter
- encoder.end_group :string
- state = :initial
- string_type = nil
else
- string_content << match
+ encoder.text_token match, :content
end
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- unless string_content.empty?
- encoder.text_token string_content, :content
- string_content = ''
- end
encoder.text_token match, :char
elsif match = scan(/ \\ . /mox)
- string_content << match
- next
+ encoder.text_token match, :content
elsif match = scan(/ \\ | $ /x)
- unless string_content.empty?
- encoder.text_token string_content, :content
- string_content = ''
- end
encoder.text_token match, :error unless match.empty?
encoder.end_group :string
state = :initial