diff options
author | Marcia Ramos <virtua.creative@gmail.com> | 2018-03-09 12:36:26 -0300 |
---|---|---|
committer | Marcia Ramos <virtua.creative@gmail.com> | 2018-03-09 12:36:26 -0300 |
commit | 5596933b535d632cf3c8159889a72b1e98e4ec0a (patch) | |
tree | 5edc39c0408a1e5bcbc13168dedbdabd1eba417f /lib/banzai | |
parent | da5694c5cbaf62d5568339efd1a6f340f97e6e53 (diff) | |
parent | 3bbe60f8e802ce3d9da060a47b7f635dedba7370 (diff) | |
download | gitlab-ce-docs-refactor-dev-guides.tar.gz |
fix conflictdocs-refactor-dev-guides
Diffstat (limited to 'lib/banzai')
-rw-r--r-- | lib/banzai/filter/abstract_reference_filter.rb | 15 | ||||
-rw-r--r-- | lib/banzai/filter/autolink_filter.rb | 86 | ||||
-rw-r--r-- | lib/banzai/filter/commit_reference_filter.rb | 3 | ||||
-rw-r--r-- | lib/banzai/filter/markdown_engines/common_mark.rb | 45 | ||||
-rw-r--r-- | lib/banzai/filter/markdown_engines/redcarpet.rb | 32 | ||||
-rw-r--r-- | lib/banzai/filter/markdown_filter.rb | 41 | ||||
-rw-r--r-- | lib/banzai/filter/syntax_highlight_filter.rb | 1 | ||||
-rw-r--r-- | lib/banzai/redactor.rb | 25 | ||||
-rw-r--r-- | lib/banzai/renderer/common_mark/html.rb | 21 | ||||
-rw-r--r-- | lib/banzai/renderer/html.rb | 13 | ||||
-rw-r--r-- | lib/banzai/renderer/redcarpet/html.rb | 15 |
11 files changed, 205 insertions, 92 deletions
diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb index e7e6a90b5fd..c9e3f8ce42b 100644 --- a/lib/banzai/filter/abstract_reference_filter.rb +++ b/lib/banzai/filter/abstract_reference_filter.rb @@ -174,7 +174,9 @@ module Banzai title = object_link_title(object) klass = reference_class(object_sym) - data = data_attributes_for(link_content || match, parent, object, link: !!link_content) + data = data_attributes_for(link_content || match, parent, object, + link_content: !!link_content, + link_reference: link_reference) url = if matches.names.include?("url") && matches[:url] @@ -194,12 +196,13 @@ module Banzai end end - def data_attributes_for(text, project, object, link: false) + def data_attributes_for(text, project, object, link_content: false, link_reference: false) data_attribute( - original: text, - link: link, - project: project.id, - object_sym => object.id + original: text, + link: link_content, + link_reference: link_reference, + project: project.id, + object_sym => object.id ) end diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index b8d2673c1a6..75b64ae9af2 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -25,8 +25,8 @@ module Banzai # period or comma for punctuation without those characters being included # in the generated link. # - # Rubular: http://rubular.com/r/cxjPyZc7Sb - LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)} + # Rubular: http://rubular.com/r/JzPhi6DCZp + LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)} # Text matching LINK_PATTERN inside these elements will not be linked IGNORE_PARENTS = %w(a code kbd pre script style).to_set @@ -35,53 +35,19 @@ module Banzai TEXT_QUERY = %Q(descendant-or-self::text()[ not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')}) and contains(., '://') - and not(starts-with(., 'http')) - and not(starts-with(., 'ftp')) ]).freeze + PUNCTUATION_PAIRS = { + "'" => "'", + '"' => '"', + ')' => '(', + ']' => '[', + '}' => '{' + }.freeze + def call return doc if context[:autolink] == false - rinku_parse - text_parse - end - - private - - # Run the text through Rinku as a first pass - # - # This will quickly autolink http(s) and ftp links. - # - # `@doc` will be re-parsed with the HTML String from Rinku. - def rinku_parse - # Convert the options from a Hash to a String that Rinku expects - options = tag_options(link_options) - - # NOTE: We don't parse email links because it will erroneously match - # external Commit and CommitRange references. - # - # The final argument tells Rinku to link short URLs that don't include a - # period (e.g., http://localhost:3000/) - rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1) - - return if rinku == html - - # Rinku returns a String, so parse it back to a Nokogiri::XML::Document - # for further processing. - @doc = parse_html(rinku) - end - - # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme - def contains_unsafe?(scheme) - return false unless scheme - - scheme = scheme.strip.downcase - Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } - end - - # Autolinks any text matching LINK_PATTERN that Rinku didn't already - # replace - def text_parse doc.xpath(TEXT_QUERY).each do |node| content = node.to_html @@ -97,6 +63,16 @@ module Banzai doc end + private + + # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme + def contains_unsafe?(scheme) + return false unless scheme + + scheme = scheme.strip.downcase + Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } + end + def autolink_match(match) # start by stripping out dangerous links begin @@ -112,12 +88,30 @@ module Banzai match.gsub!(/((?:&[\w#]+;)+)\z/, '') dropped = ($1 || '').html_safe + # To match the behaviour of Rinku, if the matched link ends with a + # closing part of a matched pair of punctuation, we remove that trailing + # character unless there are an equal number of closing and opening + # characters in the link. + if match.end_with?(*PUNCTUATION_PAIRS.keys) + close_character = match[-1] + close_count = match.count(close_character) + open_character = PUNCTUATION_PAIRS[close_character] + open_count = match.count(open_character) + + if open_count != close_count || open_character == close_character + dropped += close_character + match = match[0..-2] + end + end + options = link_options.merge(href: match) - content_tag(:a, match, options) + dropped + content_tag(:a, match.html_safe, options) + dropped end def autolink_filter(text) - text.gsub(LINK_PATTERN) { |match| autolink_match(match) } + Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:| + autolink_match(link) + end end def link_options diff --git a/lib/banzai/filter/commit_reference_filter.rb b/lib/banzai/filter/commit_reference_filter.rb index eedb95197aa..43bf4fc6565 100644 --- a/lib/banzai/filter/commit_reference_filter.rb +++ b/lib/banzai/filter/commit_reference_filter.rb @@ -18,7 +18,8 @@ module Banzai def find_object(project, id) if project && project.valid_repo? - project.commit(id) + # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/43894 + Gitlab::GitalyClient.allow_n_plus_1_calls { project.commit(id) } end end diff --git a/lib/banzai/filter/markdown_engines/common_mark.rb b/lib/banzai/filter/markdown_engines/common_mark.rb new file mode 100644 index 00000000000..bc9597df894 --- /dev/null +++ b/lib/banzai/filter/markdown_engines/common_mark.rb @@ -0,0 +1,45 @@ +# `CommonMark` markdown engine for GitLab's Banzai markdown filter. +# This module is used in Banzai::Filter::MarkdownFilter. +# Used gem is `commonmarker` which is a ruby wrapper for libcmark (CommonMark parser) +# including GitHub's GFM extensions. +# Homepage: https://github.com/gjtorikian/commonmarker + +module Banzai + module Filter + module MarkdownEngines + class CommonMark + EXTENSIONS = [ + :autolink, # provides support for automatically converting URLs to anchor tags. + :strikethrough, # provides support for strikethroughs. + :table, # provides support for tables. + :tagfilter # strips out several "unsafe" HTML tags from being used: https://github.github.com/gfm/#disallowed-raw-html-extension- + ].freeze + + PARSE_OPTIONS = [ + :FOOTNOTES, # parse footnotes. + :STRIKETHROUGH_DOUBLE_TILDE, # parse strikethroughs by double tildes (as redcarpet does). + :VALIDATE_UTF8 # replace illegal sequences with the replacement character U+FFFD. + ].freeze + + # The `:GITHUB_PRE_LANG` option is not used intentionally because + # it renders a fence block with language as `<pre lang="LANG"><code>some code\n</code></pre>` + # while GitLab's syntax is `<pre><code lang="LANG">some code\n</code></pre>`. + # If in the future the syntax is about to be made GitHub-compatible, please, add `:GITHUB_PRE_LANG` render option below + # and remove `code_block` method from `lib/banzai/renderer/common_mark/html.rb`. + RENDER_OPTIONS = [ + :DEFAULT # default rendering system. Nothing special. + ].freeze + + def initialize + @renderer = Banzai::Renderer::CommonMark::HTML.new(options: RENDER_OPTIONS) + end + + def render(text) + doc = CommonMarker.render_doc(text, PARSE_OPTIONS, EXTENSIONS) + + @renderer.render(doc) + end + end + end + end +end diff --git a/lib/banzai/filter/markdown_engines/redcarpet.rb b/lib/banzai/filter/markdown_engines/redcarpet.rb new file mode 100644 index 00000000000..ac99941fefa --- /dev/null +++ b/lib/banzai/filter/markdown_engines/redcarpet.rb @@ -0,0 +1,32 @@ +# `Redcarpet` markdown engine for GitLab's Banzai markdown filter. +# This module is used in Banzai::Filter::MarkdownFilter. +# Used gem is `redcarpet` which is a ruby library for markdown processing. +# Homepage: https://github.com/vmg/redcarpet + +module Banzai + module Filter + module MarkdownEngines + class Redcarpet + OPTIONS = { + fenced_code_blocks: true, + footnotes: true, + lax_spacing: true, + no_intra_emphasis: true, + space_after_headers: true, + strikethrough: true, + superscript: true, + tables: true + }.freeze + + def initialize + html_renderer = Banzai::Renderer::Redcarpet::HTML.new + @renderer = ::Redcarpet::Markdown.new(html_renderer, OPTIONS) + end + + def render(text) + @renderer.render(text) + end + end + end + end +end diff --git a/lib/banzai/filter/markdown_filter.rb b/lib/banzai/filter/markdown_filter.rb index 9cac303e645..c1e2b680240 100644 --- a/lib/banzai/filter/markdown_filter.rb +++ b/lib/banzai/filter/markdown_filter.rb @@ -1,34 +1,31 @@ module Banzai module Filter class MarkdownFilter < HTML::Pipeline::TextFilter - # https://github.com/vmg/redcarpet#and-its-like-really-simple-to-use - REDCARPET_OPTIONS = { - fenced_code_blocks: true, - footnotes: true, - lax_spacing: true, - no_intra_emphasis: true, - space_after_headers: true, - strikethrough: true, - superscript: true, - tables: true - }.freeze - def initialize(text, context = nil, result = nil) - super text, context, result - @text = @text.delete "\r" + super(text, context, result) + + @renderer = renderer(context[:markdown_engine]).new + @text = @text.delete("\r") end def call - html = self.class.renderer.render(@text) - html.rstrip! - html + @renderer.render(@text).rstrip + end + + private + + DEFAULT_ENGINE = :redcarpet + + def engine(engine_from_context) + engine_from_context ||= DEFAULT_ENGINE + + engine_from_context.to_s.classify end - def self.renderer - Thread.current[:banzai_markdown_renderer] ||= begin - renderer = Banzai::Renderer::HTML.new - Redcarpet::Markdown.new(renderer, REDCARPET_OPTIONS) - end + def renderer(engine_from_context) + "Banzai::Filter::MarkdownEngines::#{engine(engine_from_context)}".constantize + rescue NameError + raise NameError, "`#{engine_from_context}` is unknown markdown engine" end end end diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb index 0ac7e231b5b..6dbf0d68fe8 100644 --- a/lib/banzai/filter/syntax_highlight_filter.rb +++ b/lib/banzai/filter/syntax_highlight_filter.rb @@ -1,3 +1,4 @@ +require 'rouge/plugins/common_mark' require 'rouge/plugins/redcarpet' module Banzai diff --git a/lib/banzai/redactor.rb b/lib/banzai/redactor.rb index 827df7c08ae..fd457bebf03 100644 --- a/lib/banzai/redactor.rb +++ b/lib/banzai/redactor.rb @@ -42,16 +42,33 @@ module Banzai next if visible.include?(node) doc_data[:visible_reference_count] -= 1 - # The reference should be replaced by the original link's content, - # which is not always the same as the rendered one. - content = node.attr('data-original') || node.inner_html - node.replace(content) + redacted_content = redacted_node_content(node) + node.replace(redacted_content) end end metadata end + # Return redacted content of given node as either the original link (<a> tag), + # the original content (text), or the inner HTML of the node. + # + def redacted_node_content(node) + original_content = node.attr('data-original') + link_reference = node.attr('data-link-reference') + + # Build the raw <a> tag just with a link as href and content if + # it's originally a link pattern. We shouldn't return a plain text href. + original_link = + if link_reference == 'true' && href = original_content + %(<a href="#{href}">#{href}</a>) + end + + # The reference should be replaced by the original link's content, + # which is not always the same as the rendered one. + original_link || original_content || node.inner_html + end + def redact_cross_project_references(documents) extractor = Banzai::IssuableExtractor.new(project, user) issuables = extractor.extract(documents) diff --git a/lib/banzai/renderer/common_mark/html.rb b/lib/banzai/renderer/common_mark/html.rb new file mode 100644 index 00000000000..c7a54629f31 --- /dev/null +++ b/lib/banzai/renderer/common_mark/html.rb @@ -0,0 +1,21 @@ +module Banzai + module Renderer + module CommonMark + class HTML < CommonMarker::HtmlRenderer + def code_block(node) + block do + code = node.string_content + lang = node.fence_info + lang_attr = lang.present? ? %Q{ lang="#{lang}"} : '' + result = + "<pre>" \ + "<code#{lang_attr}>#{html_escape(code)}</code>" \ + "</pre>" + + out(result) + end + end + end + end + end +end diff --git a/lib/banzai/renderer/html.rb b/lib/banzai/renderer/html.rb deleted file mode 100644 index 252caa35947..00000000000 --- a/lib/banzai/renderer/html.rb +++ /dev/null @@ -1,13 +0,0 @@ -module Banzai - module Renderer - class HTML < Redcarpet::Render::HTML - def block_code(code, lang) - lang_attr = lang ? %Q{ lang="#{lang}"} : '' - - "\n<pre>" \ - "<code#{lang_attr}>#{html_escape(code)}</code>" \ - "</pre>" - end - end - end -end diff --git a/lib/banzai/renderer/redcarpet/html.rb b/lib/banzai/renderer/redcarpet/html.rb new file mode 100644 index 00000000000..94df5d8b1e1 --- /dev/null +++ b/lib/banzai/renderer/redcarpet/html.rb @@ -0,0 +1,15 @@ +module Banzai + module Renderer + module Redcarpet + class HTML < ::Redcarpet::Render::HTML + def block_code(code, lang) + lang_attr = lang ? %Q{ lang="#{lang}"} : '' + + "\n<pre>" \ + "<code#{lang_attr}>#{html_escape(code)}</code>" \ + "</pre>" + end + end + end + end +end |