summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrzegorz Bizon <grzesiek.bizon@gmail.com>2016-03-10 10:40:38 +0100
committerGrzegorz Bizon <grzesiek.bizon@gmail.com>2016-03-11 10:16:05 +0100
commitfc290a7dcde969d956488612512d2fa4ab9d57c0 (patch)
treec80c92ef96a3e14c22266785d8df239cfeb255f9
parent6640dbd90f157090bef488ba4c97892c6dc391d9 (diff)
downloadgitlab-ce-fc290a7dcde969d956488612512d2fa4ab9d57c0.tar.gz
Validate GFM AST lexer process, add some specs for it
-rw-r--r--lib/gitlab/gfm/ast/lexer.rb31
-rw-r--r--spec/lib/gitlab/gfm/ast/lexer_spec.rb44
2 files changed, 66 insertions, 9 deletions
diff --git a/lib/gitlab/gfm/ast/lexer.rb b/lib/gitlab/gfm/ast/lexer.rb
index adec09b62fd..76929655ac7 100644
--- a/lib/gitlab/gfm/ast/lexer.rb
+++ b/lib/gitlab/gfm/ast/lexer.rb
@@ -2,6 +2,8 @@ module Gitlab
module Gfm
module Ast
class Lexer
+ class LexerError < StandardError; end
+
##
# GFM AST Lexer
#
@@ -18,31 +20,43 @@ module Gitlab
# We expect that all text is covered by lexemes.
#
def process!
+ process_nodes!
+ @nodes.each(&:process!)
+ @nodes.sort!
+ end
+
+ private
+
+ ##
+ # Processes lexeme nodes for each token in this lexer.
+ #
+ def process_nodes!
+ return if @tokens.empty?
+
@tokens.each do |token|
ranges_available.each do |range|
- process_range(token, range)
+ process_range!(range, token)
end
end
- # TODO, validate!
- @nodes.each(&:process!)
- @nodes.sort!
+ unless ranges_available.empty?
+ raise LexerError, 'Unprocessed nodes detected!'
+ end
end
- private
-
##
# Processes a given range.
#
# If pattern is found in a range, but this range is already covered
# by an existing node, we ommit this one (flat search).
#
- def process_range(token, range)
+ def process_range!(range, token)
(@text[range]).scan(token.pattern).each do
match, offset = Regexp.last_match, range.begin
range = (match.begin(0) + offset)...(match.end(0) + offset)
next if ranges_taken.any? { |taken| taken.include?(range.begin) }
+
@nodes << token.new(match[0], range, match, @parent)
end
end
@@ -59,8 +73,9 @@ module Gitlab
taken.concat(node.range.to_a)
end
- text_indexes = (0..@text.length).to_a
+ text_indexes = (0..(@text.length - 1)).to_a
indexes_available = (text_indexes - indexes_taken).sort.uniq
+
indexes_available.inject([]) do |ranges, n|
if ranges.empty? || ranges.last.last != n - 1
ranges + [n..n]
diff --git a/spec/lib/gitlab/gfm/ast/lexer_spec.rb b/spec/lib/gitlab/gfm/ast/lexer_spec.rb
index 7a02f6a64a7..4394195695d 100644
--- a/spec/lib/gitlab/gfm/ast/lexer_spec.rb
+++ b/spec/lib/gitlab/gfm/ast/lexer_spec.rb
@@ -1,5 +1,47 @@
require 'spec_helper'
describe Gitlab::Gfm::Ast::Lexer do
- let(:parser) { described_class.new(text) }
+ let(:lexer) { described_class.new(text, tokens) }
+ let(:nodes) { lexer.process! }
+
+ context 'order of tokens' do
+ let(:tokens) do
+ [Gitlab::Gfm::Ast::Syntax::Text,
+ Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock]
+ end
+
+ let(:text) { "text and ```ruby\nblock\n```" }
+
+ it 'greedily matches tokens in order those are defined' do
+ expect(nodes.count).to eq 1
+ expect(nodes.first).to be_a Gitlab::Gfm::Ast::Syntax::Text
+ end
+ end
+
+ context 'uncovered ranges' do
+ let(:tokens) do
+ [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock]
+ end
+
+ let(:text) { "text and ```ruby\nblock\n```" }
+
+ it 'raises error when uncovered ranges remain' do
+ expect { nodes }.to raise_error(Gitlab::Gfm::Ast::Lexer::LexerError,
+ /Unprocessed nodes detected/)
+ end
+ end
+
+ context 'intersecting tokens' do
+ let(:tokens) do
+ [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock,
+ Gitlab::Gfm::Ast::Syntax::Text]
+ end
+
+ let(:text) { "```ruby\nsome text\n```" }
+
+ it 'does not match intersecting tokens' do
+ expect(nodes.count).to eq 1
+ expect(nodes.first.nodes.count).to eq 0
+ end
+ end
end