Validate GFM AST lexer process, add some specs for it

author: Grzegorz Bizon <grzesiek.bizon@gmail.com> 2016-03-10 10:40:38 +0100
committer: Grzegorz Bizon <grzesiek.bizon@gmail.com> 2016-03-11 10:16:05 +0100
commit: fc290a7dcde969d956488612512d2fa4ab9d57c0 (patch)
tree: c80c92ef96a3e14c22266785d8df239cfeb255f9
parent: 6640dbd90f157090bef488ba4c97892c6dc391d9 (diff)
download: gitlab-ce-fc290a7dcde969d956488612512d2fa4ab9d57c0.tar.gz
2 files changed, 66 insertions, 9 deletions
diff --git a/lib/gitlab/gfm/ast/lexer.rb b/lib/gitlab/gfm/ast/lexer.rb
index adec09b62fd..76929655ac7 100644
--- a/lib/gitlab/gfm/ast/lexer.rb
+++ b/lib/gitlab/gfm/ast/lexer.rb
@@ -2,6 +2,8 @@ module Gitlab
   module Gfm
     module Ast
       class Lexer
+        class LexerError < StandardError; end
+
         ##
         # GFM AST Lexer
         #
@@ -18,31 +20,43 @@ module Gitlab
         # We expect that all text is covered by lexemes.
         #
         def process!
+          process_nodes!
+          @nodes.each(&:process!)
+          @nodes.sort!
+        end
+
+        private
+
+        ##
+        # Processes lexeme nodes for each token in this lexer.
+        #
+        def process_nodes!
+          return if @tokens.empty?
+
           @tokens.each do |token|
             ranges_available.each do |range|
-              process_range(token, range)
+              process_range!(range, token)
             end
           end
 
-          # TODO, validate!
-          @nodes.each(&:process!)
-          @nodes.sort!
+          unless ranges_available.empty?
+            raise LexerError, 'Unprocessed nodes detected!'
+          end
         end
 
-        private
-
         ##
         # Processes a given range.
         #
         # If pattern is found in a range, but this range is already covered
         # by an existing node, we ommit this one (flat search).
         #
-        def process_range(token, range)
+        def process_range!(range, token)
           (@text[range]).scan(token.pattern).each do
             match, offset = Regexp.last_match, range.begin
             range = (match.begin(0) + offset)...(match.end(0) + offset)
 
             next if ranges_taken.any? { |taken| taken.include?(range.begin) }
+
             @nodes << token.new(match[0], range, match, @parent)
           end
         end
@@ -59,8 +73,9 @@ module Gitlab
             taken.concat(node.range.to_a)
           end
 
-          text_indexes = (0..@text.length).to_a
+          text_indexes = (0..(@text.length - 1)).to_a
           indexes_available = (text_indexes - indexes_taken).sort.uniq
+
           indexes_available.inject([]) do |ranges, n|
             if ranges.empty? || ranges.last.last != n - 1
               ranges + [n..n]
diff --git a/spec/lib/gitlab/gfm/ast/lexer_spec.rb b/spec/lib/gitlab/gfm/ast/lexer_spec.rb
index 7a02f6a64a7..4394195695d 100644
--- a/spec/lib/gitlab/gfm/ast/lexer_spec.rb
+++ b/spec/lib/gitlab/gfm/ast/lexer_spec.rb
@@ -1,5 +1,47 @@
 require 'spec_helper'
 
 describe Gitlab::Gfm::Ast::Lexer do
-  let(:parser) { described_class.new(text) }
+  let(:lexer) { described_class.new(text, tokens) }
+  let(:nodes) { lexer.process! }
+
+  context 'order of tokens' do
+    let(:tokens) do
+      [Gitlab::Gfm::Ast::Syntax::Text,
+       Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock]
+    end
+
+    let(:text) { "text and ```ruby\nblock\n```" }
+
+    it 'greedily matches tokens in order those are defined' do
+      expect(nodes.count).to eq 1
+      expect(nodes.first).to be_a Gitlab::Gfm::Ast::Syntax::Text
+    end
+  end
+
+  context 'uncovered ranges' do
+    let(:tokens) do
+      [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock]
+    end
+
+    let(:text) { "text and ```ruby\nblock\n```" }
+
+    it 'raises error when uncovered ranges remain' do
+      expect { nodes }.to raise_error(Gitlab::Gfm::Ast::Lexer::LexerError,
+                                      /Unprocessed nodes detected/)
+    end
+  end
+
+  context 'intersecting tokens' do
+    let(:tokens) do
+      [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock,
+       Gitlab::Gfm::Ast::Syntax::Text]
+    end
+
+    let(:text) { "```ruby\nsome text\n```" }
+
+    it 'does not match intersecting tokens' do
+      expect(nodes.count).to eq 1
+      expect(nodes.first.nodes.count).to eq 0
+    end
+  end
 end
author	Grzegorz Bizon <grzesiek.bizon@gmail.com>	2016-03-10 10:40:38 +0100
committer	Grzegorz Bizon <grzesiek.bizon@gmail.com>	2016-03-11 10:16:05 +0100
commit	fc290a7dcde969d956488612512d2fa4ab9d57c0 (patch)
tree	c80c92ef96a3e14c22266785d8df239cfeb255f9
parent	6640dbd90f157090bef488ba4c97892c6dc391d9 (diff)
download	gitlab-ce-fc290a7dcde969d956488612512d2fa4ab9d57c0.tar.gz