starting with SimpleScanner

author: Kornelius Kalnbach <murphy@rubychan.de> 2017-04-09 18:38:53 +0200
committer: Kornelius Kalnbach <murphy@rubychan.de> 2017-04-09 18:38:53 +0200
commit: 1bdaeef6c43436e4984f5b96cb17618f82832225 (patch)
tree: 5189302c0749660870d079898ce090e41b92f8fb
parent: 7a02cdded08dd232319eae17998abc877efb58cb (diff)
download: coderay-1bdaeef6c43436e4984f5b96cb17618f82832225.tar.gz
4 files changed, 545 insertions, 0 deletions
diff --git a/lib/coderay/simple_scanner.rb b/lib/coderay/simple_scanner.rb
new file mode 100644
index 0000000..6873f88
--- /dev/null
+++ b/lib/coderay/simple_scanner.rb
@@ -0,0 +1,40 @@
+require 'set'
+
+module CodeRay
+  module Scanners
+    class SimpleScanner < Scanner
+      extend SimpleScannerDSL
+
+      class << self
+        def define_scan_tokens!
+          if ENV['PUTS']
+            puts CodeRay.scan(scan_tokens_code, :ruby).terminal
+            puts "callbacks: #{callbacks.size}"
+          end
+
+          class_eval <<-RUBY
+def scan_tokens encoder, options
+#{ scan_tokens_code.chomp.gsub(/^/, '  ' * 2) }
+end
+          RUBY
+        end
+      end
+
+      def scan_tokens tokens, options
+        self.class.define_scan_tokens!
+
+        scan_tokens tokens, options
+      end
+
+      protected
+
+      def setup
+        @state = :initial
+      end
+
+      def close_groups encoder, states
+        # TODO
+      end
+    end
+  end
+end
+\ No newline at end of file
diff --git a/lib/coderay/simple_scanner_dsl.rb b/lib/coderay/simple_scanner_dsl.rb
new file mode 100644
index 0000000..b3c8c57
--- /dev/null
+++ b/lib/coderay/simple_scanner_dsl.rb
@@ -0,0 +1,381 @@
+require 'set'
+
+module CodeRay
+  module Scanners
+    module SimpleScannerDSL
+      Pattern = Struct.new :pattern
+      Groups = Struct.new :token_kinds
+      Kind = Struct.new :token_kind
+      Push = Struct.new :state, :group
+      Pop = Struct.new :group
+      PushState = Struct.new :state
+      PopState = Class.new
+      Check = Struct.new :condition
+      CheckIf = Class.new Check
+      CheckUnless = Class.new Check
+      ValueSetter = Struct.new :targets, :value
+      Increment = Struct.new :targets, :operation, :value
+      Continue = Class.new
+
+      State = Struct.new :names, :block, :dsl do
+        def initialize(*)
+          super
+          eval
+        end
+
+        def eval
+          @first = true
+
+          @code = ""
+          instance_eval(&block)
+        end
+
+        def code
+          <<-RUBY
+when #{names.map(&:inspect).join(', ')}
+#{ rules_code.chomp.gsub(/^/, '  ') }
+  else
+#{ handle_unexpected_char_code.chomp.gsub(/^/, '  ' * 2) }
+  end
+          RUBY
+        end
+
+        protected
+
+        def rules_code
+          @code
+        end
+
+        def handle_unexpected_char_code
+          ''.tap do |code|
+            code << 'puts "no match for #{state.inspect} => skip char"' << "\n" if $DEBUG
+            code << 'encoder.text_token getch, :error'
+          end
+        end
+
+        public
+
+        def on? pattern
+          pattern_expression = pattern.inspect
+          @code << "#{'els' unless @first}if check(#{pattern_expression})\n"
+
+          @first = true
+          yield
+          @code << "end\n"
+
+          @first = false
+        end
+
+        def on *pattern_and_actions
+          if index = pattern_and_actions.find_index { |item| !(item.is_a?(Check) || item.is_a?(Regexp) || item.is_a?(Pattern)) }
+            conditions = pattern_and_actions[0..index - 1] or raise 'I need conditions or a pattern!'
+            actions    = pattern_and_actions[index..-1]    or raise 'I need actions!'
+          else
+            raise "invalid rule structure: #{pattern_and_actions.map(&:class)}"
+          end
+
+          condition_expressions = []
+          if conditions
+            for condition in conditions
+              case condition
+              when CheckIf
+                case condition.condition
+                when Proc
+                  condition_expressions << "#{dsl.add_callback(condition.condition)}"
+                when Symbol
+                  condition_expressions << "#{condition.condition}"
+                else
+                  raise "I don't know how to evaluate this check_if condition: %p" % [condition.condition]
+                end
+              when CheckUnless
+                case condition.condition
+                when Proc
+                  condition_expressions << "!#{dsl.add_callback(condition.condition)}"
+                when Symbol
+                  condition_expressions << "!#{condition.condition}"
+                else
+                  raise "I don't know how to evaluate this check_unless condition: %p" % [condition.condition]
+                end
+              when Pattern
+                case condition.pattern
+                when Proc
+                  condition_expressions << "match = scan(#{dsl.add_callback(condition.pattern)})"
+                else
+                  raise "I don't know how to evaluate this pattern: %p" % [condition.pattern]
+                end
+              when Regexp
+                condition_expressions << "match = scan(#{condition.inspect})"
+              else
+                raise "I don't know how to evaluate this pattern/condition: %p" % [condition]
+              end
+            end
+          end
+
+          @code << "#{'els' unless @first}if #{condition_expressions.join(' && ')}\n"
+
+          for action in actions
+            case action
+            when String
+              raise
+              @code << "p 'evaluate #{action.inspect}'\n" if $DEBUG
+              @code << "#{action}\n"
+
+            when Symbol
+              @code << "p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+              @code << "encoder.text_token match, #{action.inspect}\n"
+            when Kind
+              case action.token_kind
+              when Proc
+                @code << "encoder.text_token match, kind = #{dsl.add_callback(action.token_kind)}\n"
+              else
+                raise "I don't know how to evaluate this kind: %p" % [action.token_kind]
+              end
+            when Groups
+              @code << "p 'text_tokens %p in groups %p' % [match, #{action.token_kinds.inspect}]\n" if $DEBUG
+              action.token_kinds.each_with_index do |kind, i|
+                @code << "encoder.text_token self[#{i + 1}], #{kind.inspect} if self[#{i + 1}]\n"
+              end
+
+            when Push, PushState
+              case action.state
+              when String
+                raise
+                @code << "p 'push %p' % [#{action.state}]\n" if $DEBUG
+                @code << "state = #{action.state}\n"
+                @code << "states << state\n"
+              when Symbol
+                @code << "p 'push %p' % [#{action.state.inspect}]\n" if $DEBUG
+                @code << "state = #{action.state.inspect}\n"
+                @code << "states << state\n"
+              when Proc
+                @code << "if new_state = #{dsl.add_callback(action.state)}\n"
+                @code << "  state = new_state\n"
+                @code << "  states << new_state\n"
+                @code << "end\n"
+              else
+                raise "I don't know how to evaluate this push state: %p" % [action.state]
+              end
+              if action.is_a? Push
+                if action.state == action.group
+                  @code << "encoder.begin_group state\n"
+                else
+                  case action.state
+                  when Symbol
+                    @code << "p 'begin group %p' % [#{action.group.inspect}]\n" if $DEBUG
+                    @code << "encoder.begin_group #{action.group.inspect}\n"
+                  when Proc
+                    @code << "encoder.begin_group #{dsl.add_callback(action.group)}\n"
+                  else
+                    raise "I don't know how to evaluate this push state: %p" % [action.state]
+                  end
+                end
+              end
+            when Pop, PopState
+              @code << "p 'pop %p' % [states.last]\n" if $DEBUG
+              if action.is_a? Pop
+                if action.group
+                  case action.group
+                  when Symbol
+                    @code << "encoder.end_group #{action.group.inspect}\n"
+                  else
+                    raise "I don't know how to evaluate this pop group: %p" % [action.group]
+                  end
+                  @code << "states.pop\n"
+                else
+                  @code << "encoder.end_group states.pop\n"
+                end
+              else
+                @code << "states.pop\n"
+              end
+              @code << "state = states.last\n"
+
+            when ValueSetter
+              case action.value
+              when Proc
+                @code << "#{action.targets.join(' = ')} = #{dsl.add_callback(action.value)}\n"
+              when Symbol
+                @code << "#{action.targets.join(' = ')} = #{action.value}\n"
+              else
+                @code << "#{action.targets.join(' = ')} = #{action.value.inspect}\n"
+              end
+
+            when Increment
+              case action.value
+              when Proc
+                @code << "#{action.targets.join(' = ')} #{action.operation}= #{dsl.add_callback(action.value)}\n"
+              when Symbol
+                @code << "#{action.targets.join(' = ')} #{action.operation}= #{action.value}\n"
+              else
+                @code << "#{action.targets.join(' = ')} #{action.operation}= #{action.value.inspect}\n"
+              end
+
+            when Proc
+              @code << "#{dsl.add_callback(action)}\n"
+
+            when Continue
+              @code << "next\n"
+
+            else
+              raise "I don't know how to evaluate this action: %p" % [action]
+            end
+          end
+
+          @first = false
+        end
+
+        def groups *token_kinds
+          Groups.new token_kinds
+        end
+
+        def pattern pattern = nil, &block
+          Pattern.new pattern || block
+        end
+
+        def kind token_kind = nil, &block
+          Kind.new token_kind || block
+        end
+
+        def push state = nil, group = state, &block
+          raise 'push requires a state or a block; got nothing' unless state || block
+          Push.new state || block, group || block
+        end
+
+        def pop group = nil
+          Pop.new group
+        end
+
+        def push_state state = nil, &block
+          raise 'push_state requires a state or a block; got nothing' unless state || block
+          PushState.new state || block
+        end
+
+        def pop_state
+          PopState.new
+        end
+
+        def check_if value = nil, &callback
+          CheckIf.new value || callback
+        end
+
+        def check_unless value = nil, &callback
+          CheckUnless.new value || callback
+        end
+
+        def flag_on *flags
+          flags.each { |name| dsl.add_variable name }
+          ValueSetter.new Array(flags), true
+        end
+
+        def flag_off *flags
+          flags.each { |name| dsl.add_variable name }
+          ValueSetter.new Array(flags), false
+        end
+
+        def set flag, value = nil, &callback
+          dsl.add_variable flag
+          ValueSetter.new [flag], value || callback
+        end
+
+        def unset *flags
+          flags.each { |name| dsl.add_variable name }
+          ValueSetter.new Array(flags), nil
+        end
+
+        def increment *counters
+          counters.each { |name| dsl.add_variable name }
+          Increment.new Array(counters), :+, 1
+        end
+
+        def decrement *counters
+          counters.each { |name| dsl.add_variable name }
+          Increment.new Array(counters), :-, 1
+        end
+
+        def continue
+          Continue.new
+        end
+      end
+
+      attr_accessor :states
+
+      def state *names, &block
+        @states ||= []
+        @states << State.new(names, block, self)
+      end
+
+      def add_callback block
+        base_name = "__callback_line_#{block.source_location.last}"
+        callback_name = base_name
+        counter = 'a'
+        while callbacks.key?(callback_name)
+          callback_name = "#{base_name}_#{counter}"
+          counter.succ!
+        end
+
+        callbacks[callback_name] = define_method(callback_name, &block)
+
+        parameters = block.parameters
+
+        if parameters.empty?
+          callback_name
+        else
+          parameter_names = parameters.map(&:last)
+          parameter_names.each { |name| variables << name }
+          "#{callback_name}(#{parameter_names.join(', ')})"
+        end
+      end
+
+      def add_variable name
+        variables << name
+      end
+
+      protected
+
+      def callbacks
+        @callbacks ||= {}
+      end
+
+      def variables
+        @variables ||= Set.new
+      end
+
+      def additional_variables
+        variables - %i(encoder options state states match kind)
+      end
+
+      def scan_tokens_code
+        <<-"RUBY"
+state = options[:state] || @state
+states = [state]
+#{ restore_local_variables_code.chomp }
+
+until eos?
+  case state
+#{ states_code.chomp.gsub(/^/, '  ') }
+  else
+    raise_inspect 'Unknown state: %p' % [state], encoder
+  end
+end
+
+@state = state if options[:keep_state]
+
+#{ close_groups_code.chomp }
+
+encoder
+        RUBY
+      end
+
+      def restore_local_variables_code
+        additional_variables.sort.map { |name| "#{name} = @#{name}" }.join("\n")
+      end
+
+      def states_code
+        @states.map(&:code)[0,1].join
+      end
+
+      def close_groups_code
+        'close_groups(encoder, states)'
+      end
+    end
+  end
+end
+\ No newline at end of file
diff --git a/spec/simple_scanner_spec.rb b/spec/simple_scanner_spec.rb
new file mode 100644
index 0000000..088343c
--- /dev/null
+++ b/spec/simple_scanner_spec.rb
@@ -0,0 +1,28 @@
+RSpec.describe CodeRay::Scanners::SimpleScanner do
+  let(:scanner) { Class.new described_class }
+
+  describe '#scan_tokens_code' do
+    subject { scanner.send :scan_tokens_code }
+    it 'lets you define states' do
+      is_expected.to eq <<-RUBY
+state = options[:state] || @state
+states = [state]
+
+
+until eos?
+  case state
+    
+  else
+    raise_inspect 'Unknown state: %p' % [state], encoder
+  end
+end
+
+@state = state if options[:keep_state]
+
+close_groups(encoder, states)
+
+encoder
+      RUBY
+    end
+  end
+end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
new file mode 100644
index 0000000..49b6a0e
--- /dev/null
+++ b/spec/spec_helper.rb
@@ -0,0 +1,96 @@
+# This file was generated by the `rspec --init` command. Conventionally, all
+# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
+# The generated `.rspec` file contains `--require spec_helper` which will cause
+# this file to always be loaded, without a need to explicitly require it in any
+# files.
+#
+# Given that it is always loaded, you are encouraged to keep this file as
+# light-weight as possible. Requiring heavyweight dependencies from this file
+# will add to the boot time of your test suite on EVERY test run, even for an
+# individual file that may not need all of that loaded. Instead, consider making
+# a separate helper file that requires the additional dependencies and performs
+# the additional setup, and require it from the spec files that actually need
+# it.
+#
+# The `.rspec` file also contains a few flags that are not defaults but that
+# users commonly want.
+#
+# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+  # rspec-expectations config goes here. You can use an alternate
+  # assertion/expectation library such as wrong or the stdlib/minitest
+  # assertions if you prefer.
+  config.expect_with :rspec do |expectations|
+    # This option will default to `true` in RSpec 4. It makes the `description`
+    # and `failure_message` of custom matchers include text for helper methods
+    # defined using `chain`, e.g.:
+    #     be_bigger_than(2).and_smaller_than(4).description
+    #     # => "be bigger than 2 and smaller than 4"
+    # ...rather than:
+    #     # => "be bigger than 2"
+    expectations.include_chain_clauses_in_custom_matcher_descriptions = true
+  end
+
+  # rspec-mocks config goes here. You can use an alternate test double
+  # library (such as bogus or mocha) by changing the `mock_with` option here.
+  config.mock_with :rspec do |mocks|
+    # Prevents you from mocking or stubbing a method that does not exist on
+    # a real object. This is generally recommended, and will default to
+    # `true` in RSpec 4.
+    mocks.verify_partial_doubles = true
+  end
+
+  # These two settings work together to allow you to limit a spec run
+  # to individual examples or groups you care about by tagging them with
+  # `:focus` metadata. When nothing is tagged with `:focus`, all examples
+  # get run.
+  config.filter_run :focus
+  config.run_all_when_everything_filtered = true
+
+  # Allows RSpec to persist some state between runs in order to support
+  # the `--only-failures` and `--next-failure` CLI options. We recommend
+  # you configure your source control system to ignore this file.
+  config.example_status_persistence_file_path = "spec/examples.txt"
+
+  # Limits the available syntax to the non-monkey patched syntax that is
+  # recommended. For more details, see:
+  #   - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
+  #   - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
+  #   - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
+  config.disable_monkey_patching!
+
+  # This setting enables warnings. It's recommended, but in some cases may
+  # be too noisy due to issues in dependencies.
+  config.warnings = true
+
+  # Many RSpec users commonly either run the entire suite or an individual
+  # file, and it's useful to allow more verbose output when running an
+  # individual spec file.
+  if config.files_to_run.one?
+    # Use the documentation formatter for detailed output,
+    # unless a formatter has already been configured
+    # (e.g. via a command-line flag).
+    config.default_formatter = 'doc'
+  end
+
+  # Print the 10 slowest examples and example groups at the
+  # end of the spec run, to help surface which specs are running
+  # particularly slow.
+  config.profile_examples = 10
+
+  # Run specs in random order to surface order dependencies. If you find an
+  # order dependency and want to debug it, you can fix the order by providing
+  # the seed, which is printed after each run.
+  #     --seed 1234
+  config.order = :random
+
+  # Seed global randomization in this process using the `--seed` CLI option.
+  # Setting this allows you to use `--seed` to deterministically reproduce
+  # test failures related to randomization by passing the same `--seed` value
+  # as the one that triggered the failure.
+  Kernel.srand config.seed
+end
+
+$LOAD_PATH << 'lib/coderay'
+
+require 'coderay'
author	Kornelius Kalnbach <murphy@rubychan.de>	2017-04-09 18:38:53 +0200
committer	Kornelius Kalnbach <murphy@rubychan.de>	2017-04-09 18:38:53 +0200
commit	1bdaeef6c43436e4984f5b96cb17618f82832225 (patch)
tree	5189302c0749660870d079898ce090e41b92f8fb
parent	7a02cdded08dd232319eae17998abc877efb58cb (diff)
download	coderay-1bdaeef6c43436e4984f5b96cb17618f82832225.tar.gz