diff options
-rw-r--r-- | lib/coderay.rb | 1 | ||||
-rw-r--r-- | lib/coderay/scanners/_map.rb | 1 | ||||
-rw-r--r-- | lib/coderay/scanners/java_script6.rb | 162 | ||||
-rw-r--r-- | lib/coderay/single_state_rule_based_scanner.rb | 370 | ||||
-rw-r--r-- | rake_tasks/test.rake | 2 |
5 files changed, 535 insertions, 1 deletions
diff --git a/lib/coderay.rb b/lib/coderay.rb index c1c9e34..e43f6bb 100644 --- a/lib/coderay.rb +++ b/lib/coderay.rb @@ -155,6 +155,7 @@ module CodeRay # DSL Scanner autoload :RuleBasedScanner, coderay_path('rule_based_scanner') + autoload :SingleStateRuleBasedScanner, coderay_path('single_state_rule_based_scanner') autoload :StateBasedScanner, coderay_path('state_based_scanner') # convenience access and reusable Encoder/Scanner pair diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 61079d5..82fb17f 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -15,6 +15,7 @@ module Scanners :javascript3 => :java_script3, :javascript4 => :java_script4, :javascript5 => :java_script5, + :javascript6 => :java_script6, :js => :java_script, :pascal => :delphi, :patch => :diff, diff --git a/lib/coderay/scanners/java_script6.rb b/lib/coderay/scanners/java_script6.rb new file mode 100644 index 0000000..b745bd4 --- /dev/null +++ b/lib/coderay/scanners/java_script6.rb @@ -0,0 +1,162 @@ +# TODO: string_delimiter should be part of the state: push(:regexp, '/'), check_if -> (state, delimiter) { … } +module CodeRay +module Scanners + + # Scanner for JavaScript. + # + # Aliases: +ecmascript+, +ecma_script+, +javascript+ + class JavaScript6 < SingleStateRuleBasedScanner + + register_for :java_script6 + file_extension 'js' + + # The actual JavaScript keywords. + KEYWORDS = %w[ + break case catch continue default delete do else + finally for function if in instanceof new + return switch throw try typeof var void while with + ] # :nodoc: + PREDEFINED_CONSTANTS = %w[ + false null true undefined NaN Infinity + ] # :nodoc: + + MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4 + + KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ + case delete in instanceof new return throw typeof with + ] # :nodoc: + + # Reserved for future use. + RESERVED_WORDS = %w[ + abstract boolean byte char class debugger double enum export extends + final float goto implements import int interface long native package + private protected public short static super synchronized throws transient + volatile + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(MAGIC_VARIABLES, :local_variable). + add(KEYWORDS, :keyword) # :nodoc: + + ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc: + STRING_CONTENT_PATTERN = { + "'" => /[^\\']+/, + '"' => /[^\\"]+/, + '/' => /[^\\\/]+/, + } # :nodoc: + KEY_CHECK_PATTERN = { + "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx, + '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx, + } # :nodoc: + + state :initial do + on %r/ \s+ | \\\n /x, :space, set(:value_expected) { |match, value_expected| value_expected || match.index(?\n) } + on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx, :comment, flag_off(:value_expected) + # state = :open_multi_line_comment if self[1] + + on? %r/\.?\d/ do + on %r/0[xX][0-9A-Fa-f]+/, :hex, flag_off(:key_expected, :value_expected) + on %r/(?>0[0-7]+)(?![89.eEfF])/, :octal, flag_off(:key_expected, :value_expected) + on %r/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, flag_off(:key_expected, :value_expected) + on %r/\d+/, :integer, flag_off(:key_expected, :value_expected) + end + + on check_if(:value_expected), %r/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim, -> (match, encoder) do + # TODO: scan over nested tags + xml_scanner.tokenize match, :tokens => encoder + end, flag_off(:value_expected) + + on %r/ [-+*=<>?:;,!&^|(\[{~%]++ (?<![{,]) | \.+(?!\d) /x, :operator, flag_on(:value_expected), flag_off(:key_expected, :function_expected) + on %r/ [-+*=<>?:;,!&^|(\[{~%]*+ (?<=[{,]) /x, :operator, flag_on(:value_expected, :key_expected), flag_off(:function_expected) + on %r/ [)\]}]+ /x, :operator, flag_off(:function_expected, :key_expected, :value_expected) + + on %r/ function (?![A-Za-z_0-9$]) /x, :keyword, flag_on(:function_expected), flag_off(:key_expected, :value_expected) + on %r/ [$a-zA-Z_][A-Za-z_0-9$]* /x, kind { |match, function_expected, key_expected| + kind = IDENT_KIND[match] + # TODO: labels + if kind == :ident + if match.index(?$) # $ allowed inside an identifier + kind = :predefined + elsif function_expected + kind = :function + elsif check(/\s*[=:]\s*function\b/) + kind = :function + elsif key_expected && check(/\s*:/) + kind = :key + end + end + + kind + }, flag_off(:function_expected, :key_expected), set(:value_expected) { |match| KEYWORDS_EXPECTING_VALUE[match] } + + on %r/["']/, push { |match, key_expected| key_expected && check(KEY_CHECK_PATTERN[match]) ? :key : :string }, :delimiter, set(:string_delimiter) { |match| match } + on check_if(:value_expected), %r/\//, push(:regexp), :delimiter + + on %r/\//, :operator, flag_on(:value_expected), flag_off(:key_expected) + end + + state :string, :key do + on pattern { |string_delimiter| STRING_CONTENT_PATTERN[string_delimiter] }, :content + on %r/["']/, :delimiter, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop + on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, kind { |match, string_delimiter| + string_delimiter == "'" && !(match == "\\\\" || match == "\\'") ? :content : :char + } + on %r/ \\. /mx, :content + on %r/ \\ /x, unset(:string_delimiter), flag_off(:key_expected, :value_expected), pop, :error + end + + state :regexp do + on STRING_CONTENT_PATTERN['/'], :content + on %r/(\/)([gim]+)?/, groups(:delimiter, :modifier), flag_off(:key_expected, :value_expected), pop + on %r/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char + on %r/\\./m, :content + on %r/ \\ /x, pop, :error, flag_off(:key_expected, :value_expected) + end + + # state :open_multi_line_comment do + # on %r! .*? \*/ !mx, :initial # don't consume! + # on %r/ .+ /mx, :comment, -> { value_expected = true } + # + # # if match = scan(%r! .*? \*/ !mx) + # # state = :initial + # # else + # # match = scan(%r! .+ !mx) + # # end + # # value_expected = true + # # encoder.text_token match, :comment if match + # end + + protected + + def setup + super + + @string_delimiter = nil + @value_expected = true + @key_expected = false + @function_expected = false + end + + def close_groups encoder, state + if [:string, :key, :regexp].include? state + encoder.end_group state + end + end + + def reset_instance + super + @xml_scanner.reset if defined? @xml_scanner + end + + def xml_scanner + @xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false + end + + end + +end +end diff --git a/lib/coderay/single_state_rule_based_scanner.rb b/lib/coderay/single_state_rule_based_scanner.rb new file mode 100644 index 0000000..cd8d4a4 --- /dev/null +++ b/lib/coderay/single_state_rule_based_scanner.rb @@ -0,0 +1,370 @@ +require 'set' + +module CodeRay + module Scanners + class SingleStateRuleBasedScanner < Scanner + + Pattern = Struct.new :pattern + Groups = Struct.new :token_kinds + Kind = Struct.new :token_kind + Push = Struct.new :state, :group + Pop = Struct.new :group + PushState = Struct.new :state + PopState = Class.new + Check = Struct.new :condition + CheckIf = Class.new Check + CheckUnless = Class.new Check + ValueSetter = Struct.new :targets, :value + Increment = Struct.new :targets, :operation, :value + Continue = Class.new + + class << self + attr_accessor :states + + def state *names, &block + @code ||= "" + + @code << "when #{names.map(&:inspect).join(', ')}\n" + + @first = true + instance_eval(&block) + @code << " else\n" + @code << " puts \"no match for \#{state.inspect} => skip char\"\n" if $DEBUG + @code << " encoder.text_token getch, :error\n" + @code << " end\n" + @code << " \n" + end + + def on? pattern + pattern_expression = pattern.inspect + @code << " #{'els' unless @first}if check(#{pattern_expression})\n" + + @first = true + yield + @code << " end\n" + + @first = false + end + + def on *pattern_and_actions + if index = pattern_and_actions.find_index { |item| !(item.is_a?(Check) || item.is_a?(Regexp) || item.is_a?(Pattern)) } + conditions = pattern_and_actions[0..index - 1] or raise 'I need conditions or a pattern!' + actions = pattern_and_actions[index..-1] or raise 'I need actions!' + else + raise "invalid rule structure: #{pattern_and_actions.map(&:class)}" + end + + condition_expressions = [] + if conditions + for condition in conditions + case condition + when CheckIf + case condition.condition + when Proc + condition_expressions << "#{make_callback(condition.condition)}" + when Symbol + condition_expressions << "#{condition.condition}" + else + raise "I don't know how to evaluate this check_if condition: %p" % [condition.condition] + end + when CheckUnless + case condition.condition + when Proc + condition_expressions << "!#{make_callback(condition.condition)}" + when Symbol + condition_expressions << "!#{condition.condition}" + else + raise "I don't know how to evaluate this check_unless condition: %p" % [condition.condition] + end + when Pattern + case condition.pattern + when Proc + condition_expressions << "match = scan(#{make_callback(condition.pattern)})" + else + raise "I don't know how to evaluate this pattern: %p" % [condition.pattern] + end + when Regexp + condition_expressions << "match = scan(#{condition.inspect})" + else + raise "I don't know how to evaluate this pattern/condition: %p" % [condition] + end + end + end + + @code << " #{'els' unless @first}if #{condition_expressions.join(' && ')}\n" + + for action in actions + case action + when String + raise + @code << " p 'evaluate #{action.inspect}'\n" if $DEBUG + @code << " #{action}\n" + + when Symbol + @code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG + @code << " encoder.text_token match, #{action.inspect}\n" + when Kind + case action.token_kind + when Proc + @code << " encoder.text_token match, kind = #{make_callback(action.token_kind)}\n" + else + raise "I don't know how to evaluate this kind: %p" % [action.token_kind] + end + when Groups + @code << " p 'text_tokens %p in groups %p' % [match, #{action.token_kinds.inspect}]\n" if $DEBUG + action.token_kinds.each_with_index do |kind, i| + @code << " encoder.text_token self[#{i + 1}], #{kind.inspect} if self[#{i + 1}]\n" + end + + when Push, PushState + case action.state + when String + raise + @code << " p 'push %p' % [#{action.state}]\n" if $DEBUG + @code << " state = #{action.state}\n" + when Symbol + @code << " p 'push %p' % [#{action.state.inspect}]\n" if $DEBUG + @code << " state = #{action.state.inspect}\n" + when Proc + @code << " if new_state = #{make_callback(action.state)}\n" + @code << " state = new_state\n" + @code << " end\n" + else + raise "I don't know how to evaluate this push state: %p" % [action.state] + end + if action.is_a? Push + if action.state == action.group + @code << " encoder.begin_group state\n" + else + case action.state + when Symbol + @code << " p 'begin group %p' % [#{action.group.inspect}]\n" if $DEBUG + @code << " encoder.begin_group #{action.group.inspect}\n" + when Proc + @code << " encoder.begin_group #{make_callback(action.group)}\n" + else + raise "I don't know how to evaluate this push state: %p" % [action.state] + end + end + end + when Pop, PopState + @code << " p 'pop %p' % [state]\n" if $DEBUG + if action.is_a? Pop + if action.group + case action.group + when Symbol + @code << " encoder.end_group #{action.group.inspect}\n" + else + raise "I don't know how to evaluate this pop group: %p" % [action.group] + end + else + @code << " encoder.end_group state\n" + end + end + @code << " state = :initial\n" + + when ValueSetter + case action.value + when Proc + @code << " #{action.targets.join(' = ')} = #{make_callback(action.value)}\n" + when Symbol + @code << " #{action.targets.join(' = ')} = #{action.value}\n" + else + @code << " #{action.targets.join(' = ')} = #{action.value.inspect}\n" + end + + when Increment + case action.value + when Proc + @code << " #{action.targets.join(' = ')} #{action.operation}= #{make_callback(action.value)}\n" + when Symbol + @code << " #{action.targets.join(' = ')} #{action.operation}= #{action.value}\n" + else + @code << " #{action.targets.join(' = ')} #{action.operation}= #{action.value.inspect}\n" + end + + when Proc + @code << " #{make_callback(action)}\n" + + when Continue + @code << " next\n" + + else + raise "I don't know how to evaluate this action: %p" % [action] + end + end + + @first = false + end + + def groups *token_kinds + Groups.new token_kinds + end + + def pattern pattern = nil, &block + Pattern.new pattern || block + end + + def kind token_kind = nil, &block + Kind.new token_kind || block + end + + def push state = nil, group = state, &block + raise 'push requires a state or a block; got nothing' unless state || block + Push.new state || block, group || block + end + + def pop group = nil + Pop.new group + end + + def push_state state = nil, &block + raise 'push_state requires a state or a block; got nothing' unless state || block + PushState.new state || block + end + + def pop_state + PopState.new + end + + def check_if value = nil, &callback + CheckIf.new value || callback + end + + def check_unless value = nil, &callback + CheckUnless.new value || callback + end + + def flag_on *flags + flags.each { |name| variables << name } + ValueSetter.new Array(flags), true + end + + def flag_off *flags + flags.each { |name| variables << name } + ValueSetter.new Array(flags), false + end + + def set flag, value = nil, &callback + variables << flag + ValueSetter.new [flag], value || callback || true + end + + def unset *flags + flags.each { |name| variables << name } + ValueSetter.new Array(flags), nil + end + + def increment *counters + counters.each { |name| variables << name } + Increment.new Array(counters), :+, 1 + end + + def decrement *counters + counters.each { |name| variables << name } + Increment.new Array(counters), :-, 1 + end + + def continue + Continue.new + end + + def define_scan_tokens! + if ENV['PUTS'] + puts CodeRay.scan(scan_tokens_code, :ruby).terminal + puts "callbacks: #{callbacks.size}" + end + + class_eval scan_tokens_code + end + + protected + + def callbacks + @callbacks ||= {} + end + + def variables + @variables ||= Set.new + end + + def additional_variables + variables - %i(encoder options state match kind) + end + + def make_callback block + base_name = "__callback_line_#{block.source_location.last}" + callback_name = base_name + counter = 'a' + while callbacks.key?(callback_name) + callback_name = "#{base_name}_#{counter}" + counter.succ! + end + + callbacks[callback_name] = define_method(callback_name, &block) + + parameters = block.parameters + + if parameters.empty? + callback_name + else + parameter_names = parameters.map(&:last) + parameter_names.each { |name| variables << name } + "#{callback_name}(#{parameter_names.join(', ')})" + end + end + + def scan_tokens_code + <<-"RUBY" + def scan_tokens encoder, options + state = options[:state] || @state + +#{ restore_local_variables_code.chomp.gsub(/^/, ' ' * 3) } + + until eos? + case state +#{ @code.chomp.gsub(/^/, ' ' * 4) } + else + raise_inspect 'Unknown state: %p' % [state], encoder + end + end + + if options[:keep_state] + @state = state + end + +#{ close_groups_code.chomp.gsub(/^/, ' ' * 3) } + + encoder + end + RUBY + end + + def restore_local_variables_code + additional_variables.sort.map { |name| "#{name} = @#{name}" }.join("\n") + end + + def close_groups_code + "close_groups(encoder, state)" + end + end + + def scan_tokens tokens, options + self.class.define_scan_tokens! + + scan_tokens tokens, options + end + + protected + + def setup + @state = :initial + end + + def close_groups encoder, state + # TODO + end + + end + end +end diff --git a/rake_tasks/test.rake b/rake_tasks/test.rake index 58e6daa..6468790 100644 --- a/rake_tasks/test.rake +++ b/rake_tasks/test.rake @@ -48,7 +48,7 @@ Please rename or remove it and run again to use the GitHub repository: task lang => :update_scanner_suite do ruby "./test/scanners/suite.rb #{lang}" end - (1..5).each do |i| + (1..6).each do |i| task "#{lang}:#{i}" => :update_scanner_suite do ruby "./test/scanners/suite.rb #{lang}:#{i}" end |