diff options
Diffstat (limited to 'lib/coderay')
-rw-r--r-- | lib/coderay/scanners/_map.rb | 3 | ||||
-rw-r--r-- | lib/coderay/scanners/java_script1.rb | 238 | ||||
-rw-r--r-- | lib/coderay/scanners/java_script2.rb | 240 | ||||
-rw-r--r-- | lib/coderay/scanners/java_script3.rb | 239 | ||||
-rw-r--r-- | lib/coderay/scanners/java_script4.rb | 152 |
5 files changed, 812 insertions, 60 deletions
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 441ccc6..8fc505a 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -10,6 +10,9 @@ module Scanners :eruby => :erb, :irb => :ruby, :javascript => :java_script, + :javascript1 => :java_script1, + :javascript2 => :java_script2, + :javascript3 => :java_script3, :javascript4 => :java_script4, :js => :java_script, :pascal => :delphi, diff --git a/lib/coderay/scanners/java_script1.rb b/lib/coderay/scanners/java_script1.rb new file mode 100644 index 0000000..4fe59ba --- /dev/null +++ b/lib/coderay/scanners/java_script1.rb @@ -0,0 +1,238 @@ +# like java_script.rb +# - but uses instance instead of local variables for flags +module CodeRay +module Scanners + + # Scanner for JavaScript. + # + # Aliases: +ecmascript+, +ecma_script+, +javascript+ + class JavaScript1 < Scanner + + register_for :java_script1 + file_extension 'js' + + # The actual JavaScript keywords. + KEYWORDS = %w[ + break case catch continue default delete do else + finally for function if in instanceof new + return switch throw try typeof var void while with + ] # :nodoc: + PREDEFINED_CONSTANTS = %w[ + false null true undefined NaN Infinity + ] # :nodoc: + + MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4 + + KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ + case delete in instanceof new return throw typeof with + ] # :nodoc: + + # Reserved for future use. + RESERVED_WORDS = %w[ + abstract boolean byte char class debugger double enum export extends + final float goto implements import int interface long native package + private protected public short static super synchronized throws transient + volatile + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(MAGIC_VARIABLES, :local_variable). + add(KEYWORDS, :keyword) # :nodoc: + + ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc: + STRING_CONTENT_PATTERN = { + "'" => /[^\\']+/, + '"' => /[^\\"]+/, + '/' => /[^\\\/]+/, + } # :nodoc: + KEY_CHECK_PATTERN = { + "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx, + '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx, + } # :nodoc: + + protected + + def setup + @state = :initial + end + + def scan_tokens encoder, options + + state, @string_delimiter = options[:state] || @state + if @string_delimiter + encoder.begin_group state + end + + @value_expected = true + @key_expected = false + @function_expected = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + @value_expected = true if !@value_expected && match.index(?\n) + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx) + @value_expected = true + encoder.text_token match, :comment + state = :open_multi_line_comment if self[1] + + elsif check(/\.?\d/) + @key_expected = @value_expected = false + if match = scan(/0[xX][0-9A-Fa-f]+/) + encoder.text_token match, :hex + elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) + encoder.text_token match, :octal + elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + encoder.text_token match, :float + elsif match = scan(/\d+/) + encoder.text_token match, :integer + end + + elsif @value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim) + # TODO: scan over nested tags + xml_scanner.tokenize match, :tokens => encoder + @value_expected = false + + elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x) + @value_expected = true + last_operator = match[-1] + @key_expected = (last_operator == ?{) || (last_operator == ?,) + @function_expected = false + encoder.text_token match, :operator + + elsif match = scan(/ [)\]}]+ /x) + @function_expected = @key_expected = @value_expected = false + encoder.text_token match, :operator + + elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) + kind = IDENT_KIND[match] + @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] + # TODO: labels + if kind == :ident + if match.index(?$) # $ allowed inside an identifier + kind = :predefined + elsif @function_expected + kind = :function + elsif check(/\s*[=:]\s*function\b/) + kind = :function + elsif @key_expected && check(/\s*:/) + kind = :key + end + end + @function_expected = (kind == :keyword) && (match == 'function') + @key_expected = false + encoder.text_token match, kind + + elsif match = scan(/["']/) + if @key_expected && check(KEY_CHECK_PATTERN[match]) + state = :key + else + state = :string + end + encoder.begin_group state + @string_delimiter = match + encoder.text_token match, :delimiter + + elsif @value_expected && (match = scan(/\//)) + encoder.begin_group :regexp + state = :regexp + @string_delimiter = '/' + encoder.text_token match, :delimiter + + elsif match = scan(/ \/ /x) + @value_expected = true + @key_expected = false + encoder.text_token match, :operator + + else + encoder.text_token getch, :error + + end + + when :string, :regexp, :key + if match = scan(STRING_CONTENT_PATTERN[@string_delimiter]) + encoder.text_token match, :content + elsif match = scan(/["'\/]/) + encoder.text_token match, :delimiter + if state == :regexp + modifiers = scan(/[gim]+/) + encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? + end + encoder.end_group state + @string_delimiter = nil + @key_expected = @value_expected = false + state = :initial + elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) + if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'") + encoder.text_token match, :content + else + encoder.text_token match, :char + end + elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/ \\ | $ /x) + encoder.end_group state + encoder.text_token match, :error unless match.empty? + @string_delimiter = nil + @key_expected = @value_expected = false + state = :initial + else + raise_inspect "else case #{@string_delimiter} reached; %p not handled." % peek(1), encoder + end + + when :open_multi_line_comment + if match = scan(%r! .*? \*/ !mx) + state = :initial + else + match = scan(%r! .+ !mx) + end + @value_expected = true + encoder.text_token match, :comment if match + + else + #:nocov: + raise_inspect 'Unknown state: %p' % [state], encoder + #:nocov: + + end + + end + + if options[:keep_state] + @state = state, @string_delimiter + end + + if [:string, :regexp].include? state + encoder.end_group state + end + + encoder + end + + protected + + def reset_instance + super + @xml_scanner.reset if defined? @xml_scanner + end + + def xml_scanner + @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false + end + + end + +end +end diff --git a/lib/coderay/scanners/java_script2.rb b/lib/coderay/scanners/java_script2.rb new file mode 100644 index 0000000..42fa640 --- /dev/null +++ b/lib/coderay/scanners/java_script2.rb @@ -0,0 +1,240 @@ +# like java_script.rb +# - but uses instance instead of local variables for flags +# - but uses the same rule logic as java_script4.rb +# - also uses states array push/pop +module CodeRay +module Scanners + + # Scanner for JavaScript. + # + # Aliases: +ecmascript+, +ecma_script+, +javascript+ + class JavaScript2 < Scanner + + register_for :java_script2 + file_extension 'js' + + # The actual JavaScript keywords. + KEYWORDS = %w[ + break case catch continue default delete do else + finally for function if in instanceof new + return switch throw try typeof var void while with + ] # :nodoc: + PREDEFINED_CONSTANTS = %w[ + false null true undefined NaN Infinity + ] # :nodoc: + + MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4 + + KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ + case delete in instanceof new return throw typeof with + ] # :nodoc: + + # Reserved for future use. + RESERVED_WORDS = %w[ + abstract boolean byte char class debugger double enum export extends + final float goto implements import int interface long native package + private protected public short static super synchronized throws transient + volatile + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(MAGIC_VARIABLES, :local_variable). + add(KEYWORDS, :keyword) # :nodoc: + + ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc: + STRING_CONTENT_PATTERN = { + "'" => /[^\\']+/, + '"' => /[^\\"]+/, + '/' => /[^\\\/]+/, + } # :nodoc: + KEY_CHECK_PATTERN = { + "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx, + '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx, + } # :nodoc: + + protected + + def setup + @state = :initial + end + + def scan_tokens encoder, options + + state, @string_delimiter = options[:state] || @state + if @string_delimiter + encoder.begin_group state + end + + @value_expected = true + @key_expected = false + @function_expected = false + + states = [state] + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + encoder.text_token match, :space + @value_expected = true if !@value_expected && match.index(?\n) + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx) + encoder.text_token match, :comment + @value_expected = true + # state = :open_multi_line_comment if self[1] + + elsif check(/\.?\d/) + if match = scan(/0[xX][0-9A-Fa-f]+/) + encoder.text_token match, :hex + elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) + encoder.text_token match, :octal + elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + encoder.text_token match, :float + elsif match = scan(/\d+/) + encoder.text_token match, :integer + end + @key_expected = @value_expected = false + + elsif @value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim) + # TODO: scan over nested tags + xml_scanner.tokenize match, :tokens => encoder + @value_expected = false + + elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x) + encoder.text_token match, :operator + @value_expected = true + @key_expected = /[{,]$/ === match + @function_expected = false + + elsif match = scan(/ [)\]}]+ /x) + encoder.text_token match, :operator + @function_expected = @key_expected = @value_expected = false + + elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) + kind = IDENT_KIND[match] + @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] + # TODO: labels + if kind == :ident + if match.index(?$) # $ allowed inside an identifier + kind = :predefined + elsif @function_expected + kind = :function + elsif check(/\s*[=:]\s*function\b/) + kind = :function + elsif @key_expected && check(/\s*:/) + kind = :key + end + end + encoder.text_token match, kind + @function_expected = (kind == :keyword) && (match == 'function') + @key_expected = false + + elsif match = scan(/["']/) + state = (@key_expected && check(KEY_CHECK_PATTERN[match])) ? :key : :string + states << state + encoder.begin_group state + @string_delimiter = match + encoder.text_token match, :delimiter + + elsif @value_expected && (match = scan(/\//)) + state = :regexp + states << state + encoder.begin_group state + @string_delimiter = '/' + encoder.text_token match, :delimiter + + elsif match = scan(/ \/ /x) + @value_expected = true + @key_expected = false + encoder.text_token match, :operator + + else + encoder.text_token getch, :error + + end + + when :string, :regexp, :key + if match = scan(STRING_CONTENT_PATTERN[@string_delimiter]) + encoder.text_token match, :content + elsif match = scan(/["'\/]/) + encoder.text_token match, :delimiter + if match == '/' + modifiers = scan(/[gim]+/) + encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? + end + @string_delimiter = nil + @key_expected = @value_expected = false + encoder.end_group states.pop + state = states.last + elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) + if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'") + encoder.text_token match, :content + else + encoder.text_token match, :char + end + elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/ \\ | $ /x) + encoder.end_group states.pop + state = states.last + encoder.text_token match, :error unless match.empty? + @string_delimiter = nil + @key_expected = @value_expected = false + else + raise_inspect "else case #{@string_delimiter} reached; %p not handled." % peek(1), encoder + end + + # when :open_multi_line_comment + # if match = scan(%r! .*? \*/ !mx) + # states.pop + # state = states.last + # else + # match = scan(%r! .+ !mx) + # end + # @value_expected = true + # encoder.text_token match, :comment if match + + else + #:nocov: + raise_inspect 'Unknown state: %p' % [state], encoder + #:nocov: + + end + + end + + if options[:keep_state] + @state = state, @string_delimiter + end + + if [:string, :regexp].include? state + encoder.end_group state + end + + encoder + end + + protected + + def reset_instance + super + @xml_scanner.reset if defined? @xml_scanner + end + + def xml_scanner + @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false + end + + end + +end +end diff --git a/lib/coderay/scanners/java_script3.rb b/lib/coderay/scanners/java_script3.rb new file mode 100644 index 0000000..9492967 --- /dev/null +++ b/lib/coderay/scanners/java_script3.rb @@ -0,0 +1,239 @@ +# like java_script.rb +# - but uses the same rule logic as java_script4.rb +# - also uses states array push/pop +module CodeRay +module Scanners + + # Scanner for JavaScript. + # + # Aliases: +ecmascript+, +ecma_script+, +javascript+ + class JavaScript3 < Scanner + + register_for :java_script3 + file_extension 'js' + + # The actual JavaScript keywords. + KEYWORDS = %w[ + break case catch continue default delete do else + finally for function if in instanceof new + return switch throw try typeof var void while with + ] # :nodoc: + PREDEFINED_CONSTANTS = %w[ + false null true undefined NaN Infinity + ] # :nodoc: + + MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4 + + KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ + case delete in instanceof new return throw typeof with + ] # :nodoc: + + # Reserved for future use. + RESERVED_WORDS = %w[ + abstract boolean byte char class debugger double enum export extends + final float goto implements import int interface long native package + private protected public short static super synchronized throws transient + volatile + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(MAGIC_VARIABLES, :local_variable). + add(KEYWORDS, :keyword) # :nodoc: + + ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc: + STRING_CONTENT_PATTERN = { + "'" => /[^\\']+/, + '"' => /[^\\"]+/, + '/' => /[^\\\/]+/, + } # :nodoc: + KEY_CHECK_PATTERN = { + "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx, + '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx, + } # :nodoc: + + protected + + def setup + @state = :initial + end + + def scan_tokens encoder, options + + state, string_delimiter = options[:state] || @state + if string_delimiter + encoder.begin_group state + end + + value_expected = true + key_expected = false + function_expected = false + + states = [state] + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + encoder.text_token match, :space + value_expected = true if !value_expected && match.index(?\n) + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx) + encoder.text_token match, :comment + value_expected = true + # state = :open_multi_line_comment if self[1] + + elsif check(/\.?\d/) + if match = scan(/0[xX][0-9A-Fa-f]+/) + encoder.text_token match, :hex + elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) + encoder.text_token match, :octal + elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + encoder.text_token match, :float + elsif match = scan(/\d+/) + encoder.text_token match, :integer + end + key_expected = value_expected = false + + elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim) + # TODO: scan over nested tags + xml_scanner.tokenize match, :tokens => encoder + value_expected = false + + elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x) + encoder.text_token match, :operator + value_expected = true + key_expected = /[{,]$/ === match + function_expected = false + + elsif match = scan(/ [)\]}]+ /x) + encoder.text_token match, :operator + function_expected = key_expected = value_expected = false + + elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) + kind = IDENT_KIND[match] + value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] + # TODO: labels + if kind == :ident + if match.index(?$) # $ allowed inside an identifier + kind = :predefined + elsif function_expected + kind = :function + elsif check(/\s*[=:]\s*function\b/) + kind = :function + elsif key_expected && check(/\s*:/) + kind = :key + end + end + encoder.text_token match, kind + function_expected = (kind == :keyword) && (match == 'function') + key_expected = false + + elsif match = scan(/["']/) + state = (key_expected && check(KEY_CHECK_PATTERN[match])) ? :key : :string + states << state + encoder.begin_group state + string_delimiter = match + encoder.text_token match, :delimiter + + elsif value_expected && (match = scan(/\//)) + state = :regexp + states << state + encoder.begin_group state + string_delimiter = '/' + encoder.text_token match, :delimiter + + elsif match = scan(/ \/ /x) + value_expected = true + key_expected = false + encoder.text_token match, :operator + + else + encoder.text_token getch, :error + + end + + when :string, :regexp, :key + if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) + encoder.text_token match, :content + elsif match = scan(/["'\/]/) + encoder.text_token match, :delimiter + if match == '/' + modifiers = scan(/[gim]+/) + encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? + end + string_delimiter = nil + key_expected = value_expected = false + encoder.end_group states.pop + state = states.last + elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)) + if string_delimiter == "'" && !(match == "\\\\" || match == "\\'") + encoder.text_token match, :content + else + encoder.text_token match, :char + end + elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/\\./m) + encoder.text_token match, :content + elsif match = scan(/ \\ | $ /x) + encoder.end_group states.pop + state = states.last + encoder.text_token match, :error unless match.empty? + string_delimiter = nil + key_expected = value_expected = false + else + raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder + end + + # when :open_multi_line_comment + # if match = scan(%r! .*? \*/ !mx) + # states.pop + # state = states.last + # else + # match = scan(%r! .+ !mx) + # end + # value_expected = true + # encoder.text_token match, :comment if match + + else + #:nocov: + raise_inspect 'Unknown state: %p' % [state], encoder + #:nocov: + + end + + end + + if options[:keep_state] + @state = state, string_delimiter + end + + if [:string, :regexp].include? state + encoder.end_group state + end + + encoder + end + + protected + + def reset_instance + super + @xml_scanner.reset if defined? @xml_scanner + end + + def xml_scanner + @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false + end + + end + +end +end diff --git a/lib/coderay/scanners/java_script4.rb b/lib/coderay/scanners/java_script4.rb index 10aa709..4b9601f 100644 --- a/lib/coderay/scanners/java_script4.rb +++ b/lib/coderay/scanners/java_script4.rb @@ -1,9 +1,10 @@ +# TODO: string_delimiter should be part of the state: push(:regexp, '/'), check_if -> (state, delimiter) { … } module CodeRay module Scanners class RuleBasedScanner5 < Scanner - CheckIf = Struct.new :callback + CheckIf = Struct.new :condition class << self attr_accessor :states @@ -22,7 +23,18 @@ module Scanners @@code << " \n" end - def token *pattern_and_actions + def on? pattern + pattern_expression = pattern.inspect + @@code << " #{'els' unless @@first}if check(#{pattern_expression})\n" + + @@first = true + yield + @@code << " end\n" + + @@first = false + end + + def on *pattern_and_actions if index = pattern_and_actions.find_index { |item| !item.is_a?(CheckIf) } preconditions = pattern_and_actions[0..index - 1] if index > 0 pattern = pattern_and_actions[index] or raise 'I need a pattern!' @@ -34,16 +46,23 @@ module Scanners for precondition in preconditions case precondition when CheckIf - callback = make_callback(precondition.callback) - case precondition.callback.arity - when 0 - arguments = '' - when 1 - arguments = '(state)' + case precondition.condition + when Proc + callback = make_callback(precondition.condition) + case precondition.condition.arity + when 0 + arguments = '' + when 1 + arguments = '(state)' + else + raise "I got %p arguments for precondition: %p, but I only know how to evaluate 0..1" % [precondition.condition.arity, callback] + end + precondition_expression << "#{callback}#{arguments} && " + when Symbol + precondition_expression << "#{precondition.condition} && " else - raise "I got %p arguments for precondition: %p, but I only know how to evaluate 0..1" % [precondition.callback.arity, callback] + raise "I don't know how to evaluate this check_if precondition: %p" % [precondition.condition] end - precondition_expression << "#{callback}#{arguments} && " else raise "I don't know how to evaluate this precondition: %p" % [precondition] end @@ -51,6 +70,8 @@ module Scanners end case pattern + # when String + # pattern_expression = pattern when Regexp pattern_expression = pattern.inspect when Proc @@ -127,8 +148,8 @@ module Scanners [:pop] end - def check_if &callback - CheckIf.new callback + def check_if value = nil, &callback + CheckIf.new value || callback end protected @@ -201,37 +222,35 @@ module Scanners } # :nodoc: state :initial do - token %r/ \s+ | \\\n /x, :space, -> (match) do - @value_expected = true if !@value_expected && match.index(?\n) - end + # on %r/ [ \t]* \n \s* /x, :space, -> { @value_expected = true } + # on %r/ [ \t]+ | \\\n /x, :space + on %r/ \s+ | \\\n /x, :space, -> (match) { @value_expected = true if !@value_expected && match.index(?\n) } - token %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, -> (match) do - @value_expected = true + on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, -> { @value_expected = true } # state = :open_multi_line_comment if self[1] - end - # elsif check(/\.?\d/) - token %r/0[xX][0-9A-Fa-f]+/, :hex, -> { @key_expected = @value_expected = false } - token %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, -> { @key_expected = @value_expected = false } - token %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, -> { @key_expected = @value_expected = false } - token %r/\d+/, :integer, -> { @key_expected = @value_expected = false } + on? %r/\.?\d/ do + on %r/0[xX][0-9A-Fa-f]+/, :hex, -> { @key_expected = @value_expected = false } + on %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, -> { @key_expected = @value_expected = false } + on %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, -> { @key_expected = @value_expected = false } + on %r/\d+/, :integer, -> { @key_expected = @value_expected = false } + end - token check_if { @value_expected }, %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do + on check_if(:@value_expected), %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do # TODO: scan over nested tags xml_scanner.tokenize match, :tokens => encoder @value_expected = false end - token %r/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x, :operator, -> (match) do + on %r/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x, :operator, -> (match) do @value_expected = true - last_operator = match[-1] - @key_expected = (last_operator == ?{) || (last_operator == ?,) + @key_expected = /[{,]$/ === match @function_expected = false end - token %r/ [)\]}]+ /x, :operator, -> { @function_expected = @key_expected = @value_expected = false } + on %r/ [)\]}]+ /x, :operator, -> { @function_expected = @key_expected = @value_expected = false } - token %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, -> (match, encoder) do + on %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, -> (match, encoder) do kind = IDENT_KIND[match] @value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] # TODO: labels @@ -246,37 +265,47 @@ module Scanners kind = :key end end + encoder.text_token match, kind @function_expected = (kind == :keyword) && (match == 'function') @key_expected = false - encoder.text_token match, kind end - token %r/["']/, push { |match| + on %r/["']/, push { |match| + @string_delimiter = match @key_expected && check(KEY_CHECK_PATTERN[match]) ? :key : :string - }, :delimiter, -> (match) { @string_delimiter = match } + }, :delimiter - token check_if { @value_expected }, %r/\//, push(:regexp), :delimiter, -> { @string_delimiter = '/' } + on check_if(:@value_expected), %r/\//, push(:regexp), :delimiter, -> { @string_delimiter = '/' } - token %r/ \/ /x, :operator, -> { @value_expected = true; @key_expected = false } + on %r/ \/ /x, :operator, -> { @value_expected = true; @key_expected = false } end state :string, :regexp, :key do - token -> { STRING_CONTENT_PATTERN[@string_delimiter] }, :content + on -> { STRING_CONTENT_PATTERN[@string_delimiter] }, :content + # on 'STRING_CONTENT_PATTERN[@string_delimiter]', :content - token %r/\//, :delimiter, -> (match, encoder) do - modifiers = scan(/[gim]+/) - encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? - end, -> do - @string_delimiter = nil - @key_expected = @value_expected = false - end, pop + # on %r/\//, :delimiter, -> (match, encoder) do + # modifiers = scan(/[gim]+/) + # encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? + # @string_delimiter = nil + # @key_expected = @value_expected = false + # end, pop + # + # on %r/["']/, :delimiter, -> do + # @string_delimiter = nil + # @key_expected = @value_expected = false + # end, pop - token %r/["']/, :delimiter, -> do + on %r/["'\/]/, :delimiter, -> (match, encoder) do + if match == '/' + modifiers = scan(/[gim]+/) + encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? + end @string_delimiter = nil @key_expected = @value_expected = false end, pop - token check_if { |state| state != :regexp }, %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox, -> (match, encoder) do + on check_if { |state| state != :regexp }, %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox, -> (match, encoder) do if @string_delimiter == "'" && !(match == "\\\\" || match == "\\'") encoder.text_token match, :content else @@ -284,26 +313,26 @@ module Scanners end end - token check_if { |state| state == :regexp }, %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox, :char - token %r/\\./m, :content - token %r/ \\ /x, pop, :error, -> (match, encoder) do + on check_if { |state| state == :regexp }, %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox, :char + on %r/\\./m, :content + on %r/ \\ /x, pop, :error, -> do @string_delimiter = nil @key_expected = @value_expected = false end end - state :open_multi_line_comment do - token %r! .*? \*/ !mx, :initial # don't consume! - token %r/ .+ /mx, :comment, -> { @value_expected = true } - - # if match = scan(%r! .*? \*/ !mx) - # state = :initial - # else - # match = scan(%r! .+ !mx) - # end - # value_expected = true - # encoder.text_token match, :comment if match - end + # state :open_multi_line_comment do + # on %r! .*? \*/ !mx, :initial # don't consume! + # on %r/ .+ /mx, :comment, -> { @value_expected = true } + # + # # if match = scan(%r! .*? \*/ !mx) + # # state = :initial + # # else + # # match = scan(%r! .+ !mx) + # # end + # # value_expected = true + # # encoder.text_token match, :comment if match + # end protected @@ -348,7 +377,10 @@ module Scanners end RUBY - # puts scan_tokens_code + if ENV['PUTS'] + puts scan_tokens_code + puts "callbacks: #{@callbacks.size}" + end class_eval scan_tokens_code, __FILE__, def_line protected |