summaryrefslogtreecommitdiff
path: root/lib/banzai
diff options
context:
space:
mode:
Diffstat (limited to 'lib/banzai')
-rw-r--r--lib/banzai/filter/abstract_reference_filter.rb15
-rw-r--r--lib/banzai/filter/autolink_filter.rb86
-rw-r--r--lib/banzai/filter/commit_reference_filter.rb3
-rw-r--r--lib/banzai/filter/markdown_engines/common_mark.rb45
-rw-r--r--lib/banzai/filter/markdown_engines/redcarpet.rb32
-rw-r--r--lib/banzai/filter/markdown_filter.rb41
-rw-r--r--lib/banzai/filter/syntax_highlight_filter.rb1
-rw-r--r--lib/banzai/redactor.rb25
-rw-r--r--lib/banzai/renderer/common_mark/html.rb21
-rw-r--r--lib/banzai/renderer/html.rb13
-rw-r--r--lib/banzai/renderer/redcarpet/html.rb15
11 files changed, 205 insertions, 92 deletions
diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb
index e7e6a90b5fd..c9e3f8ce42b 100644
--- a/lib/banzai/filter/abstract_reference_filter.rb
+++ b/lib/banzai/filter/abstract_reference_filter.rb
@@ -174,7 +174,9 @@ module Banzai
title = object_link_title(object)
klass = reference_class(object_sym)
- data = data_attributes_for(link_content || match, parent, object, link: !!link_content)
+ data = data_attributes_for(link_content || match, parent, object,
+ link_content: !!link_content,
+ link_reference: link_reference)
url =
if matches.names.include?("url") && matches[:url]
@@ -194,12 +196,13 @@ module Banzai
end
end
- def data_attributes_for(text, project, object, link: false)
+ def data_attributes_for(text, project, object, link_content: false, link_reference: false)
data_attribute(
- original: text,
- link: link,
- project: project.id,
- object_sym => object.id
+ original: text,
+ link: link_content,
+ link_reference: link_reference,
+ project: project.id,
+ object_sym => object.id
)
end
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index b8d2673c1a6..75b64ae9af2 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -25,8 +25,8 @@ module Banzai
# period or comma for punctuation without those characters being included
# in the generated link.
#
- # Rubular: http://rubular.com/r/cxjPyZc7Sb
- LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)}
+ # Rubular: http://rubular.com/r/JzPhi6DCZp
+ LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)}
# Text matching LINK_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
@@ -35,53 +35,19 @@ module Banzai
TEXT_QUERY = %Q(descendant-or-self::text()[
not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
and contains(., '://')
- and not(starts-with(., 'http'))
- and not(starts-with(., 'ftp'))
]).freeze
+ PUNCTUATION_PAIRS = {
+ "'" => "'",
+ '"' => '"',
+ ')' => '(',
+ ']' => '[',
+ '}' => '{'
+ }.freeze
+
def call
return doc if context[:autolink] == false
- rinku_parse
- text_parse
- end
-
- private
-
- # Run the text through Rinku as a first pass
- #
- # This will quickly autolink http(s) and ftp links.
- #
- # `@doc` will be re-parsed with the HTML String from Rinku.
- def rinku_parse
- # Convert the options from a Hash to a String that Rinku expects
- options = tag_options(link_options)
-
- # NOTE: We don't parse email links because it will erroneously match
- # external Commit and CommitRange references.
- #
- # The final argument tells Rinku to link short URLs that don't include a
- # period (e.g., http://localhost:3000/)
- rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1)
-
- return if rinku == html
-
- # Rinku returns a String, so parse it back to a Nokogiri::XML::Document
- # for further processing.
- @doc = parse_html(rinku)
- end
-
- # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
- def contains_unsafe?(scheme)
- return false unless scheme
-
- scheme = scheme.strip.downcase
- Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
- end
-
- # Autolinks any text matching LINK_PATTERN that Rinku didn't already
- # replace
- def text_parse
doc.xpath(TEXT_QUERY).each do |node|
content = node.to_html
@@ -97,6 +63,16 @@ module Banzai
doc
end
+ private
+
+ # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
+ def contains_unsafe?(scheme)
+ return false unless scheme
+
+ scheme = scheme.strip.downcase
+ Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
+ end
+
def autolink_match(match)
# start by stripping out dangerous links
begin
@@ -112,12 +88,30 @@ module Banzai
match.gsub!(/((?:&[\w#]+;)+)\z/, '')
dropped = ($1 || '').html_safe
+ # To match the behaviour of Rinku, if the matched link ends with a
+ # closing part of a matched pair of punctuation, we remove that trailing
+ # character unless there are an equal number of closing and opening
+ # characters in the link.
+ if match.end_with?(*PUNCTUATION_PAIRS.keys)
+ close_character = match[-1]
+ close_count = match.count(close_character)
+ open_character = PUNCTUATION_PAIRS[close_character]
+ open_count = match.count(open_character)
+
+ if open_count != close_count || open_character == close_character
+ dropped += close_character
+ match = match[0..-2]
+ end
+ end
+
options = link_options.merge(href: match)
- content_tag(:a, match, options) + dropped
+ content_tag(:a, match.html_safe, options) + dropped
end
def autolink_filter(text)
- text.gsub(LINK_PATTERN) { |match| autolink_match(match) }
+ Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
+ autolink_match(link)
+ end
end
def link_options
diff --git a/lib/banzai/filter/commit_reference_filter.rb b/lib/banzai/filter/commit_reference_filter.rb
index eedb95197aa..43bf4fc6565 100644
--- a/lib/banzai/filter/commit_reference_filter.rb
+++ b/lib/banzai/filter/commit_reference_filter.rb
@@ -18,7 +18,8 @@ module Banzai
def find_object(project, id)
if project && project.valid_repo?
- project.commit(id)
+ # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/43894
+ Gitlab::GitalyClient.allow_n_plus_1_calls { project.commit(id) }
end
end
diff --git a/lib/banzai/filter/markdown_engines/common_mark.rb b/lib/banzai/filter/markdown_engines/common_mark.rb
new file mode 100644
index 00000000000..bc9597df894
--- /dev/null
+++ b/lib/banzai/filter/markdown_engines/common_mark.rb
@@ -0,0 +1,45 @@
+# `CommonMark` markdown engine for GitLab's Banzai markdown filter.
+# This module is used in Banzai::Filter::MarkdownFilter.
+# Used gem is `commonmarker` which is a ruby wrapper for libcmark (CommonMark parser)
+# including GitHub's GFM extensions.
+# Homepage: https://github.com/gjtorikian/commonmarker
+
+module Banzai
+ module Filter
+ module MarkdownEngines
+ class CommonMark
+ EXTENSIONS = [
+ :autolink, # provides support for automatically converting URLs to anchor tags.
+ :strikethrough, # provides support for strikethroughs.
+ :table, # provides support for tables.
+ :tagfilter # strips out several "unsafe" HTML tags from being used: https://github.github.com/gfm/#disallowed-raw-html-extension-
+ ].freeze
+
+ PARSE_OPTIONS = [
+ :FOOTNOTES, # parse footnotes.
+ :STRIKETHROUGH_DOUBLE_TILDE, # parse strikethroughs by double tildes (as redcarpet does).
+ :VALIDATE_UTF8 # replace illegal sequences with the replacement character U+FFFD.
+ ].freeze
+
+ # The `:GITHUB_PRE_LANG` option is not used intentionally because
+ # it renders a fence block with language as `<pre lang="LANG"><code>some code\n</code></pre>`
+ # while GitLab's syntax is `<pre><code lang="LANG">some code\n</code></pre>`.
+ # If in the future the syntax is about to be made GitHub-compatible, please, add `:GITHUB_PRE_LANG` render option below
+ # and remove `code_block` method from `lib/banzai/renderer/common_mark/html.rb`.
+ RENDER_OPTIONS = [
+ :DEFAULT # default rendering system. Nothing special.
+ ].freeze
+
+ def initialize
+ @renderer = Banzai::Renderer::CommonMark::HTML.new(options: RENDER_OPTIONS)
+ end
+
+ def render(text)
+ doc = CommonMarker.render_doc(text, PARSE_OPTIONS, EXTENSIONS)
+
+ @renderer.render(doc)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/banzai/filter/markdown_engines/redcarpet.rb b/lib/banzai/filter/markdown_engines/redcarpet.rb
new file mode 100644
index 00000000000..ac99941fefa
--- /dev/null
+++ b/lib/banzai/filter/markdown_engines/redcarpet.rb
@@ -0,0 +1,32 @@
+# `Redcarpet` markdown engine for GitLab's Banzai markdown filter.
+# This module is used in Banzai::Filter::MarkdownFilter.
+# Used gem is `redcarpet` which is a ruby library for markdown processing.
+# Homepage: https://github.com/vmg/redcarpet
+
+module Banzai
+ module Filter
+ module MarkdownEngines
+ class Redcarpet
+ OPTIONS = {
+ fenced_code_blocks: true,
+ footnotes: true,
+ lax_spacing: true,
+ no_intra_emphasis: true,
+ space_after_headers: true,
+ strikethrough: true,
+ superscript: true,
+ tables: true
+ }.freeze
+
+ def initialize
+ html_renderer = Banzai::Renderer::Redcarpet::HTML.new
+ @renderer = ::Redcarpet::Markdown.new(html_renderer, OPTIONS)
+ end
+
+ def render(text)
+ @renderer.render(text)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/banzai/filter/markdown_filter.rb b/lib/banzai/filter/markdown_filter.rb
index 9cac303e645..c1e2b680240 100644
--- a/lib/banzai/filter/markdown_filter.rb
+++ b/lib/banzai/filter/markdown_filter.rb
@@ -1,34 +1,31 @@
module Banzai
module Filter
class MarkdownFilter < HTML::Pipeline::TextFilter
- # https://github.com/vmg/redcarpet#and-its-like-really-simple-to-use
- REDCARPET_OPTIONS = {
- fenced_code_blocks: true,
- footnotes: true,
- lax_spacing: true,
- no_intra_emphasis: true,
- space_after_headers: true,
- strikethrough: true,
- superscript: true,
- tables: true
- }.freeze
-
def initialize(text, context = nil, result = nil)
- super text, context, result
- @text = @text.delete "\r"
+ super(text, context, result)
+
+ @renderer = renderer(context[:markdown_engine]).new
+ @text = @text.delete("\r")
end
def call
- html = self.class.renderer.render(@text)
- html.rstrip!
- html
+ @renderer.render(@text).rstrip
+ end
+
+ private
+
+ DEFAULT_ENGINE = :redcarpet
+
+ def engine(engine_from_context)
+ engine_from_context ||= DEFAULT_ENGINE
+
+ engine_from_context.to_s.classify
end
- def self.renderer
- Thread.current[:banzai_markdown_renderer] ||= begin
- renderer = Banzai::Renderer::HTML.new
- Redcarpet::Markdown.new(renderer, REDCARPET_OPTIONS)
- end
+ def renderer(engine_from_context)
+ "Banzai::Filter::MarkdownEngines::#{engine(engine_from_context)}".constantize
+ rescue NameError
+ raise NameError, "`#{engine_from_context}` is unknown markdown engine"
end
end
end
diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb
index 0ac7e231b5b..6dbf0d68fe8 100644
--- a/lib/banzai/filter/syntax_highlight_filter.rb
+++ b/lib/banzai/filter/syntax_highlight_filter.rb
@@ -1,3 +1,4 @@
+require 'rouge/plugins/common_mark'
require 'rouge/plugins/redcarpet'
module Banzai
diff --git a/lib/banzai/redactor.rb b/lib/banzai/redactor.rb
index 827df7c08ae..fd457bebf03 100644
--- a/lib/banzai/redactor.rb
+++ b/lib/banzai/redactor.rb
@@ -42,16 +42,33 @@ module Banzai
next if visible.include?(node)
doc_data[:visible_reference_count] -= 1
- # The reference should be replaced by the original link's content,
- # which is not always the same as the rendered one.
- content = node.attr('data-original') || node.inner_html
- node.replace(content)
+ redacted_content = redacted_node_content(node)
+ node.replace(redacted_content)
end
end
metadata
end
+ # Return redacted content of given node as either the original link (<a> tag),
+ # the original content (text), or the inner HTML of the node.
+ #
+ def redacted_node_content(node)
+ original_content = node.attr('data-original')
+ link_reference = node.attr('data-link-reference')
+
+ # Build the raw <a> tag just with a link as href and content if
+ # it's originally a link pattern. We shouldn't return a plain text href.
+ original_link =
+ if link_reference == 'true' && href = original_content
+ %(<a href="#{href}">#{href}</a>)
+ end
+
+ # The reference should be replaced by the original link's content,
+ # which is not always the same as the rendered one.
+ original_link || original_content || node.inner_html
+ end
+
def redact_cross_project_references(documents)
extractor = Banzai::IssuableExtractor.new(project, user)
issuables = extractor.extract(documents)
diff --git a/lib/banzai/renderer/common_mark/html.rb b/lib/banzai/renderer/common_mark/html.rb
new file mode 100644
index 00000000000..c7a54629f31
--- /dev/null
+++ b/lib/banzai/renderer/common_mark/html.rb
@@ -0,0 +1,21 @@
+module Banzai
+ module Renderer
+ module CommonMark
+ class HTML < CommonMarker::HtmlRenderer
+ def code_block(node)
+ block do
+ code = node.string_content
+ lang = node.fence_info
+ lang_attr = lang.present? ? %Q{ lang="#{lang}"} : ''
+ result =
+ "<pre>" \
+ "<code#{lang_attr}>#{html_escape(code)}</code>" \
+ "</pre>"
+
+ out(result)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/banzai/renderer/html.rb b/lib/banzai/renderer/html.rb
deleted file mode 100644
index 252caa35947..00000000000
--- a/lib/banzai/renderer/html.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-module Banzai
- module Renderer
- class HTML < Redcarpet::Render::HTML
- def block_code(code, lang)
- lang_attr = lang ? %Q{ lang="#{lang}"} : ''
-
- "\n<pre>" \
- "<code#{lang_attr}>#{html_escape(code)}</code>" \
- "</pre>"
- end
- end
- end
-end
diff --git a/lib/banzai/renderer/redcarpet/html.rb b/lib/banzai/renderer/redcarpet/html.rb
new file mode 100644
index 00000000000..94df5d8b1e1
--- /dev/null
+++ b/lib/banzai/renderer/redcarpet/html.rb
@@ -0,0 +1,15 @@
+module Banzai
+ module Renderer
+ module Redcarpet
+ class HTML < ::Redcarpet::Render::HTML
+ def block_code(code, lang)
+ lang_attr = lang ? %Q{ lang="#{lang}"} : ''
+
+ "\n<pre>" \
+ "<code#{lang_attr}>#{html_escape(code)}</code>" \
+ "</pre>"
+ end
+ end
+ end
+ end
+end