summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/scanners/_map.rb3
-rw-r--r--lib/coderay/scanners/java_script1.rb238
-rw-r--r--lib/coderay/scanners/java_script2.rb240
-rw-r--r--lib/coderay/scanners/java_script3.rb239
-rw-r--r--lib/coderay/scanners/java_script4.rb152
5 files changed, 812 insertions, 60 deletions
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb
index 441ccc6..8fc505a 100644
--- a/lib/coderay/scanners/_map.rb
+++ b/lib/coderay/scanners/_map.rb
@@ -10,6 +10,9 @@ module Scanners
:eruby => :erb,
:irb => :ruby,
:javascript => :java_script,
+ :javascript1 => :java_script1,
+ :javascript2 => :java_script2,
+ :javascript3 => :java_script3,
:javascript4 => :java_script4,
:js => :java_script,
:pascal => :delphi,
diff --git a/lib/coderay/scanners/java_script1.rb b/lib/coderay/scanners/java_script1.rb
new file mode 100644
index 0000000..4fe59ba
--- /dev/null
+++ b/lib/coderay/scanners/java_script1.rb
@@ -0,0 +1,238 @@
+# like java_script.rb
+# - but uses instance instead of local variables for flags
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript1 < Scanner
+
+ register_for :java_script1
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def scan_tokens encoder, options
+
+ state, @string_delimiter = options[:state] || @state
+ if @string_delimiter
+ encoder.begin_group state
+ end
+
+ @value_expected = true
+ @key_expected = false
+ @function_expected = false
+
+ until eos?
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ @value_expected = true if !@value_expected && match.index(?\n)
+ encoder.text_token match, :space
+
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
+ @value_expected = true
+ encoder.text_token match, :comment
+ state = :open_multi_line_comment if self[1]
+
+ elsif check(/\.?\d/)
+ @key_expected = @value_expected = false
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :octal
+ elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+/)
+ encoder.text_token match, :integer
+ end
+
+ elsif @value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ @value_expected = false
+
+ elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
+ @value_expected = true
+ last_operator = match[-1]
+ @key_expected = (last_operator == ?{) || (last_operator == ?,)
+ @function_expected = false
+ encoder.text_token match, :operator
+
+ elsif match = scan(/ [)\]}]+ /x)
+ @function_expected = @key_expected = @value_expected = false
+ encoder.text_token match, :operator
+
+ elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
+ kind = IDENT_KIND[match]
+ @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif @function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif @key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ @function_expected = (kind == :keyword) && (match == 'function')
+ @key_expected = false
+ encoder.text_token match, kind
+
+ elsif match = scan(/["']/)
+ if @key_expected && check(KEY_CHECK_PATTERN[match])
+ state = :key
+ else
+ state = :string
+ end
+ encoder.begin_group state
+ @string_delimiter = match
+ encoder.text_token match, :delimiter
+
+ elsif @value_expected && (match = scan(/\//))
+ encoder.begin_group :regexp
+ state = :regexp
+ @string_delimiter = '/'
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ \/ /x)
+ @value_expected = true
+ @key_expected = false
+ encoder.text_token match, :operator
+
+ else
+ encoder.text_token getch, :error
+
+ end
+
+ when :string, :regexp, :key
+ if match = scan(STRING_CONTENT_PATTERN[@string_delimiter])
+ encoder.text_token match, :content
+ elsif match = scan(/["'\/]/)
+ encoder.text_token match, :delimiter
+ if state == :regexp
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
+ encoder.end_group state
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ state = :initial
+ elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ encoder.text_token match, :content
+ else
+ encoder.text_token match, :char
+ end
+ elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group state
+ encoder.text_token match, :error unless match.empty?
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ state = :initial
+ else
+ raise_inspect "else case #{@string_delimiter} reached; %p not handled." % peek(1), encoder
+ end
+
+ when :open_multi_line_comment
+ if match = scan(%r! .*? \*/ !mx)
+ state = :initial
+ else
+ match = scan(%r! .+ !mx)
+ end
+ @value_expected = true
+ encoder.text_token match, :comment if match
+
+ else
+ #:nocov:
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ #:nocov:
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state, @string_delimiter
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ protected
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script2.rb b/lib/coderay/scanners/java_script2.rb
new file mode 100644
index 0000000..42fa640
--- /dev/null
+++ b/lib/coderay/scanners/java_script2.rb
@@ -0,0 +1,240 @@
+# like java_script.rb
+# - but uses instance instead of local variables for flags
+# - but uses the same rule logic as java_script4.rb
+# - also uses states array push/pop
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript2 < Scanner
+
+ register_for :java_script2
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def scan_tokens encoder, options
+
+ state, @string_delimiter = options[:state] || @state
+ if @string_delimiter
+ encoder.begin_group state
+ end
+
+ @value_expected = true
+ @key_expected = false
+ @function_expected = false
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ encoder.text_token match, :space
+ @value_expected = true if !@value_expected && match.index(?\n)
+
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
+ encoder.text_token match, :comment
+ @value_expected = true
+ # state = :open_multi_line_comment if self[1]
+
+ elsif check(/\.?\d/)
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :octal
+ elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+/)
+ encoder.text_token match, :integer
+ end
+ @key_expected = @value_expected = false
+
+ elsif @value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ @value_expected = false
+
+ elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
+ encoder.text_token match, :operator
+ @value_expected = true
+ @key_expected = /[{,]$/ === match
+ @function_expected = false
+
+ elsif match = scan(/ [)\]}]+ /x)
+ encoder.text_token match, :operator
+ @function_expected = @key_expected = @value_expected = false
+
+ elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
+ kind = IDENT_KIND[match]
+ @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif @function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif @key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ encoder.text_token match, kind
+ @function_expected = (kind == :keyword) && (match == 'function')
+ @key_expected = false
+
+ elsif match = scan(/["']/)
+ state = (@key_expected && check(KEY_CHECK_PATTERN[match])) ? :key : :string
+ states << state
+ encoder.begin_group state
+ @string_delimiter = match
+ encoder.text_token match, :delimiter
+
+ elsif @value_expected && (match = scan(/\//))
+ state = :regexp
+ states << state
+ encoder.begin_group state
+ @string_delimiter = '/'
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ \/ /x)
+ @value_expected = true
+ @key_expected = false
+ encoder.text_token match, :operator
+
+ else
+ encoder.text_token getch, :error
+
+ end
+
+ when :string, :regexp, :key
+ if match = scan(STRING_CONTENT_PATTERN[@string_delimiter])
+ encoder.text_token match, :content
+ elsif match = scan(/["'\/]/)
+ encoder.text_token match, :delimiter
+ if match == '/'
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ encoder.end_group states.pop
+ state = states.last
+ elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ encoder.text_token match, :content
+ else
+ encoder.text_token match, :char
+ end
+ elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group states.pop
+ state = states.last
+ encoder.text_token match, :error unless match.empty?
+ @string_delimiter = nil
+ @key_expected = @value_expected = false
+ else
+ raise_inspect "else case #{@string_delimiter} reached; %p not handled." % peek(1), encoder
+ end
+
+ # when :open_multi_line_comment
+ # if match = scan(%r! .*? \*/ !mx)
+ # states.pop
+ # state = states.last
+ # else
+ # match = scan(%r! .+ !mx)
+ # end
+ # @value_expected = true
+ # encoder.text_token match, :comment if match
+
+ else
+ #:nocov:
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ #:nocov:
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state, @string_delimiter
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ protected
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script3.rb b/lib/coderay/scanners/java_script3.rb
new file mode 100644
index 0000000..9492967
--- /dev/null
+++ b/lib/coderay/scanners/java_script3.rb
@@ -0,0 +1,239 @@
+# like java_script.rb
+# - but uses the same rule logic as java_script4.rb
+# - also uses states array push/pop
+module CodeRay
+module Scanners
+
+ # Scanner for JavaScript.
+ #
+ # Aliases: +ecmascript+, +ecma_script+, +javascript+
+ class JavaScript3 < Scanner
+
+ register_for :java_script3
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ finally for function if in instanceof new
+ return switch throw try typeof var void while with
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ false null true undefined NaN Infinity
+ ] # :nodoc:
+
+ MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof with
+ ] # :nodoc:
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword) # :nodoc:
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+ REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ } # :nodoc:
+ KEY_CHECK_PATTERN = {
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
+ } # :nodoc:
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ def scan_tokens encoder, options
+
+ state, string_delimiter = options[:state] || @state
+ if string_delimiter
+ encoder.begin_group state
+ end
+
+ value_expected = true
+ key_expected = false
+ function_expected = false
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ encoder.text_token match, :space
+ value_expected = true if !value_expected && match.index(?\n)
+
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
+ encoder.text_token match, :comment
+ value_expected = true
+ # state = :open_multi_line_comment if self[1]
+
+ elsif check(/\.?\d/)
+ if match = scan(/0[xX][0-9A-Fa-f]+/)
+ encoder.text_token match, :hex
+ elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
+ encoder.text_token match, :octal
+ elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ encoder.text_token match, :float
+ elsif match = scan(/\d+/)
+ encoder.text_token match, :integer
+ end
+ key_expected = value_expected = false
+
+ elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
+ # TODO: scan over nested tags
+ xml_scanner.tokenize match, :tokens => encoder
+ value_expected = false
+
+ elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
+ encoder.text_token match, :operator
+ value_expected = true
+ key_expected = /[{,]$/ === match
+ function_expected = false
+
+ elsif match = scan(/ [)\]}]+ /x)
+ encoder.text_token match, :operator
+ function_expected = key_expected = value_expected = false
+
+ elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
+ kind = IDENT_KIND[match]
+ value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ # TODO: labels
+ if kind == :ident
+ if match.index(?$) # $ allowed inside an identifier
+ kind = :predefined
+ elsif function_expected
+ kind = :function
+ elsif check(/\s*[=:]\s*function\b/)
+ kind = :function
+ elsif key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ encoder.text_token match, kind
+ function_expected = (kind == :keyword) && (match == 'function')
+ key_expected = false
+
+ elsif match = scan(/["']/)
+ state = (key_expected && check(KEY_CHECK_PATTERN[match])) ? :key : :string
+ states << state
+ encoder.begin_group state
+ string_delimiter = match
+ encoder.text_token match, :delimiter
+
+ elsif value_expected && (match = scan(/\//))
+ state = :regexp
+ states << state
+ encoder.begin_group state
+ string_delimiter = '/'
+ encoder.text_token match, :delimiter
+
+ elsif match = scan(/ \/ /x)
+ value_expected = true
+ key_expected = false
+ encoder.text_token match, :operator
+
+ else
+ encoder.text_token getch, :error
+
+ end
+
+ when :string, :regexp, :key
+ if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
+ encoder.text_token match, :content
+ elsif match = scan(/["'\/]/)
+ encoder.text_token match, :delimiter
+ if match == '/'
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
+ string_delimiter = nil
+ key_expected = value_expected = false
+ encoder.end_group states.pop
+ state = states.last
+ elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ encoder.text_token match, :content
+ else
+ encoder.text_token match, :char
+ end
+ elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ encoder.text_token match, :char
+ elsif match = scan(/\\./m)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ | $ /x)
+ encoder.end_group states.pop
+ state = states.last
+ encoder.text_token match, :error unless match.empty?
+ string_delimiter = nil
+ key_expected = value_expected = false
+ else
+ raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
+ end
+
+ # when :open_multi_line_comment
+ # if match = scan(%r! .*? \*/ !mx)
+ # states.pop
+ # state = states.last
+ # else
+ # match = scan(%r! .+ !mx)
+ # end
+ # value_expected = true
+ # encoder.text_token match, :comment if match
+
+ else
+ #:nocov:
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ #:nocov:
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state, string_delimiter
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ protected
+
+ def reset_instance
+ super
+ @xml_scanner.reset if defined? @xml_scanner
+ end
+
+ def xml_scanner
+ @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/java_script4.rb b/lib/coderay/scanners/java_script4.rb
index 10aa709..4b9601f 100644
--- a/lib/coderay/scanners/java_script4.rb
+++ b/lib/coderay/scanners/java_script4.rb
@@ -1,9 +1,10 @@
+# TODO: string_delimiter should be part of the state: push(:regexp, '/'), check_if -> (state, delimiter) { … }
module CodeRay
module Scanners
class RuleBasedScanner5 < Scanner
- CheckIf = Struct.new :callback
+ CheckIf = Struct.new :condition
class << self
attr_accessor :states
@@ -22,7 +23,18 @@ module Scanners
@@code << " \n"
end
- def token *pattern_and_actions
+ def on? pattern
+ pattern_expression = pattern.inspect
+ @@code << " #{'els' unless @@first}if check(#{pattern_expression})\n"
+
+ @@first = true
+ yield
+ @@code << " end\n"
+
+ @@first = false
+ end
+
+ def on *pattern_and_actions
if index = pattern_and_actions.find_index { |item| !item.is_a?(CheckIf) }
preconditions = pattern_and_actions[0..index - 1] if index > 0
pattern = pattern_and_actions[index] or raise 'I need a pattern!'
@@ -34,16 +46,23 @@ module Scanners
for precondition in preconditions
case precondition
when CheckIf
- callback = make_callback(precondition.callback)
- case precondition.callback.arity
- when 0
- arguments = ''
- when 1
- arguments = '(state)'
+ case precondition.condition
+ when Proc
+ callback = make_callback(precondition.condition)
+ case precondition.condition.arity
+ when 0
+ arguments = ''
+ when 1
+ arguments = '(state)'
+ else
+ raise "I got %p arguments for precondition: %p, but I only know how to evaluate 0..1" % [precondition.condition.arity, callback]
+ end
+ precondition_expression << "#{callback}#{arguments} && "
+ when Symbol
+ precondition_expression << "#{precondition.condition} && "
else
- raise "I got %p arguments for precondition: %p, but I only know how to evaluate 0..1" % [precondition.callback.arity, callback]
+ raise "I don't know how to evaluate this check_if precondition: %p" % [precondition.condition]
end
- precondition_expression << "#{callback}#{arguments} && "
else
raise "I don't know how to evaluate this precondition: %p" % [precondition]
end
@@ -51,6 +70,8 @@ module Scanners
end
case pattern
+ # when String
+ # pattern_expression = pattern
when Regexp
pattern_expression = pattern.inspect
when Proc
@@ -127,8 +148,8 @@ module Scanners
[:pop]
end
- def check_if &callback
- CheckIf.new callback
+ def check_if value = nil, &callback
+ CheckIf.new value || callback
end
protected
@@ -201,37 +222,35 @@ module Scanners
} # :nodoc:
state :initial do
- token %r/ \s+ | \\\n /x, :space, -> (match) do
- @value_expected = true if !@value_expected && match.index(?\n)
- end
+ # on %r/ [ \t]* \n \s* /x, :space, -> { @value_expected = true }
+ # on %r/ [ \t]+ | \\\n /x, :space
+ on %r/ \s+ | \\\n /x, :space, -> (match) { @value_expected = true if !@value_expected && match.index(?\n) }
- token %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, -> (match) do
- @value_expected = true
+ on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, -> { @value_expected = true }
# state = :open_multi_line_comment if self[1]
- end
- # elsif check(/\.?\d/)
- token %r/0[xX][0-9A-Fa-f]+/, :hex, -> { @key_expected = @value_expected = false }
- token %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, -> { @key_expected = @value_expected = false }
- token %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, -> { @key_expected = @value_expected = false }
- token %r/\d+/, :integer, -> { @key_expected = @value_expected = false }
+ on? %r/\.?\d/ do
+ on %r/0[xX][0-9A-Fa-f]+/, :hex, -> { @key_expected = @value_expected = false }
+ on %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, -> { @key_expected = @value_expected = false }
+ on %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, -> { @key_expected = @value_expected = false }
+ on %r/\d+/, :integer, -> { @key_expected = @value_expected = false }
+ end
- token check_if { @value_expected }, %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do
+ on check_if(:@value_expected), %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do
# TODO: scan over nested tags
xml_scanner.tokenize match, :tokens => encoder
@value_expected = false
end
- token %r/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x, :operator, -> (match) do
+ on %r/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x, :operator, -> (match) do
@value_expected = true
- last_operator = match[-1]
- @key_expected = (last_operator == ?{) || (last_operator == ?,)
+ @key_expected = /[{,]$/ === match
@function_expected = false
end
- token %r/ [)\]}]+ /x, :operator, -> { @function_expected = @key_expected = @value_expected = false }
+ on %r/ [)\]}]+ /x, :operator, -> { @function_expected = @key_expected = @value_expected = false }
- token %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, -> (match, encoder) do
+ on %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, -> (match, encoder) do
kind = IDENT_KIND[match]
@value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
# TODO: labels
@@ -246,37 +265,47 @@ module Scanners
kind = :key
end
end
+ encoder.text_token match, kind
@function_expected = (kind == :keyword) && (match == 'function')
@key_expected = false
- encoder.text_token match, kind
end
- token %r/["']/, push { |match|
+ on %r/["']/, push { |match|
+ @string_delimiter = match
@key_expected && check(KEY_CHECK_PATTERN[match]) ? :key : :string
- }, :delimiter, -> (match) { @string_delimiter = match }
+ }, :delimiter
- token check_if { @value_expected }, %r/\//, push(:regexp), :delimiter, -> { @string_delimiter = '/' }
+ on check_if(:@value_expected), %r/\//, push(:regexp), :delimiter, -> { @string_delimiter = '/' }
- token %r/ \/ /x, :operator, -> { @value_expected = true; @key_expected = false }
+ on %r/ \/ /x, :operator, -> { @value_expected = true; @key_expected = false }
end
state :string, :regexp, :key do
- token -> { STRING_CONTENT_PATTERN[@string_delimiter] }, :content
+ on -> { STRING_CONTENT_PATTERN[@string_delimiter] }, :content
+ # on 'STRING_CONTENT_PATTERN[@string_delimiter]', :content
- token %r/\//, :delimiter, -> (match, encoder) do
- modifiers = scan(/[gim]+/)
- encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
- end, -> do
- @string_delimiter = nil
- @key_expected = @value_expected = false
- end, pop
+ # on %r/\//, :delimiter, -> (match, encoder) do
+ # modifiers = scan(/[gim]+/)
+ # encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ # @string_delimiter = nil
+ # @key_expected = @value_expected = false
+ # end, pop
+ #
+ # on %r/["']/, :delimiter, -> do
+ # @string_delimiter = nil
+ # @key_expected = @value_expected = false
+ # end, pop
- token %r/["']/, :delimiter, -> do
+ on %r/["'\/]/, :delimiter, -> (match, encoder) do
+ if match == '/'
+ modifiers = scan(/[gim]+/)
+ encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
+ end
@string_delimiter = nil
@key_expected = @value_expected = false
end, pop
- token check_if { |state| state != :regexp }, %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox, -> (match, encoder) do
+ on check_if { |state| state != :regexp }, %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox, -> (match, encoder) do
if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
@@ -284,26 +313,26 @@ module Scanners
end
end
- token check_if { |state| state == :regexp }, %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox, :char
- token %r/\\./m, :content
- token %r/ \\ /x, pop, :error, -> (match, encoder) do
+ on check_if { |state| state == :regexp }, %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox, :char
+ on %r/\\./m, :content
+ on %r/ \\ /x, pop, :error, -> do
@string_delimiter = nil
@key_expected = @value_expected = false
end
end
- state :open_multi_line_comment do
- token %r! .*? \*/ !mx, :initial # don't consume!
- token %r/ .+ /mx, :comment, -> { @value_expected = true }
-
- # if match = scan(%r! .*? \*/ !mx)
- # state = :initial
- # else
- # match = scan(%r! .+ !mx)
- # end
- # value_expected = true
- # encoder.text_token match, :comment if match
- end
+ # state :open_multi_line_comment do
+ # on %r! .*? \*/ !mx, :initial # don't consume!
+ # on %r/ .+ /mx, :comment, -> { @value_expected = true }
+ #
+ # # if match = scan(%r! .*? \*/ !mx)
+ # # state = :initial
+ # # else
+ # # match = scan(%r! .+ !mx)
+ # # end
+ # # value_expected = true
+ # # encoder.text_token match, :comment if match
+ # end
protected
@@ -348,7 +377,10 @@ module Scanners
end
RUBY
- # puts scan_tokens_code
+ if ENV['PUTS']
+ puts scan_tokens_code
+ puts "callbacks: #{@callbacks.size}"
+ end
class_eval scan_tokens_code, __FILE__, def_line
protected