diff options
author | Grzegorz Bizon <grzesiek.bizon@gmail.com> | 2016-03-10 10:40:38 +0100 |
---|---|---|
committer | Grzegorz Bizon <grzesiek.bizon@gmail.com> | 2016-03-11 10:16:05 +0100 |
commit | fc290a7dcde969d956488612512d2fa4ab9d57c0 (patch) | |
tree | c80c92ef96a3e14c22266785d8df239cfeb255f9 | |
parent | 6640dbd90f157090bef488ba4c97892c6dc391d9 (diff) | |
download | gitlab-ce-fc290a7dcde969d956488612512d2fa4ab9d57c0.tar.gz |
Validate GFM AST lexer process, add some specs for it
-rw-r--r-- | lib/gitlab/gfm/ast/lexer.rb | 31 | ||||
-rw-r--r-- | spec/lib/gitlab/gfm/ast/lexer_spec.rb | 44 |
2 files changed, 66 insertions, 9 deletions
diff --git a/lib/gitlab/gfm/ast/lexer.rb b/lib/gitlab/gfm/ast/lexer.rb index adec09b62fd..76929655ac7 100644 --- a/lib/gitlab/gfm/ast/lexer.rb +++ b/lib/gitlab/gfm/ast/lexer.rb @@ -2,6 +2,8 @@ module Gitlab module Gfm module Ast class Lexer + class LexerError < StandardError; end + ## # GFM AST Lexer # @@ -18,31 +20,43 @@ module Gitlab # We expect that all text is covered by lexemes. # def process! + process_nodes! + @nodes.each(&:process!) + @nodes.sort! + end + + private + + ## + # Processes lexeme nodes for each token in this lexer. + # + def process_nodes! + return if @tokens.empty? + @tokens.each do |token| ranges_available.each do |range| - process_range(token, range) + process_range!(range, token) end end - # TODO, validate! - @nodes.each(&:process!) - @nodes.sort! + unless ranges_available.empty? + raise LexerError, 'Unprocessed nodes detected!' + end end - private - ## # Processes a given range. # # If pattern is found in a range, but this range is already covered # by an existing node, we ommit this one (flat search). # - def process_range(token, range) + def process_range!(range, token) (@text[range]).scan(token.pattern).each do match, offset = Regexp.last_match, range.begin range = (match.begin(0) + offset)...(match.end(0) + offset) next if ranges_taken.any? { |taken| taken.include?(range.begin) } + @nodes << token.new(match[0], range, match, @parent) end end @@ -59,8 +73,9 @@ module Gitlab taken.concat(node.range.to_a) end - text_indexes = (0..@text.length).to_a + text_indexes = (0..(@text.length - 1)).to_a indexes_available = (text_indexes - indexes_taken).sort.uniq + indexes_available.inject([]) do |ranges, n| if ranges.empty? || ranges.last.last != n - 1 ranges + [n..n] diff --git a/spec/lib/gitlab/gfm/ast/lexer_spec.rb b/spec/lib/gitlab/gfm/ast/lexer_spec.rb index 7a02f6a64a7..4394195695d 100644 --- a/spec/lib/gitlab/gfm/ast/lexer_spec.rb +++ b/spec/lib/gitlab/gfm/ast/lexer_spec.rb @@ -1,5 +1,47 @@ require 'spec_helper' describe Gitlab::Gfm::Ast::Lexer do - let(:parser) { described_class.new(text) } + let(:lexer) { described_class.new(text, tokens) } + let(:nodes) { lexer.process! } + + context 'order of tokens' do + let(:tokens) do + [Gitlab::Gfm::Ast::Syntax::Text, + Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock] + end + + let(:text) { "text and ```ruby\nblock\n```" } + + it 'greedily matches tokens in order those are defined' do + expect(nodes.count).to eq 1 + expect(nodes.first).to be_a Gitlab::Gfm::Ast::Syntax::Text + end + end + + context 'uncovered ranges' do + let(:tokens) do + [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock] + end + + let(:text) { "text and ```ruby\nblock\n```" } + + it 'raises error when uncovered ranges remain' do + expect { nodes }.to raise_error(Gitlab::Gfm::Ast::Lexer::LexerError, + /Unprocessed nodes detected/) + end + end + + context 'intersecting tokens' do + let(:tokens) do + [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock, + Gitlab::Gfm::Ast::Syntax::Text] + end + + let(:text) { "```ruby\nsome text\n```" } + + it 'does not match intersecting tokens' do + expect(nodes.count).to eq 1 + expect(nodes.first.nodes.count).to eq 0 + end + end end |