summaryrefslogtreecommitdiff
path: root/lib/banzai
diff options
context:
space:
mode:
Diffstat (limited to 'lib/banzai')
-rw-r--r--lib/banzai/filter/emoji_filter.rb19
-rw-r--r--lib/banzai/filter/footnote_filter.rb56
-rw-r--r--lib/banzai/filter/markdown_engines/common_mark.rb52
-rw-r--r--lib/banzai/filter/markdown_post_escape_filter.rb18
-rw-r--r--lib/banzai/filter/plantuml_filter.rb18
-rw-r--r--lib/banzai/filter/sanitization_filter.rb9
-rw-r--r--lib/banzai/filter/syntax_highlight_filter.rb43
-rw-r--r--lib/banzai/renderer.rb42
-rw-r--r--lib/banzai/renderer/common_mark/html.rb2
9 files changed, 189 insertions, 70 deletions
diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb
index 9d24bf028b6..d8c9fd0a7f0 100644
--- a/lib/banzai/filter/emoji_filter.rb
+++ b/lib/banzai/filter/emoji_filter.rb
@@ -8,7 +8,6 @@ module Banzai
# Based on HTML::Pipeline::EmojiFilter
class EmojiFilter < HTML::Pipeline::Filter
IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set
- IGNORE_UNICODE_EMOJIS = %w(™ © ®).freeze
def call
doc.xpath('descendant-or-self::text()').each do |node|
@@ -35,7 +34,8 @@ module Banzai
def emoji_name_element_unicode_filter(text)
text.gsub(emoji_pattern) do |match|
name = Regexp.last_match(1)
- Gitlab::Emoji.gl_emoji_tag(name)
+ emoji = TanukiEmoji.find_by_alpha_code(name)
+ Gitlab::Emoji.gl_emoji_tag(emoji)
end
end
@@ -46,26 +46,19 @@ module Banzai
# Returns a String with unicode emoji replaced with gl-emoji unicode.
def emoji_unicode_element_unicode_filter(text)
text.gsub(emoji_unicode_pattern) do |moji|
- emoji_info = Gitlab::Emoji.emojis_by_moji[moji]
- Gitlab::Emoji.gl_emoji_tag(emoji_info['name'])
+ emoji = TanukiEmoji.find_by_codepoints(moji)
+ Gitlab::Emoji.gl_emoji_tag(emoji)
end
end
# Build a regexp that matches all valid :emoji: names.
def self.emoji_pattern
- @emoji_pattern ||=
- %r{(?<=[^[:alnum:]:]|\n|^)
- :(#{Gitlab::Emoji.emojis_names.map { |name| Regexp.escape(name) }.join('|')}):
- (?=[^[:alnum:]:]|$)}x
+ @emoji_pattern ||= TanukiEmoji.index.alpha_code_pattern
end
# Build a regexp that matches all valid unicode emojis names.
def self.emoji_unicode_pattern
- @emoji_unicode_pattern ||=
- begin
- filtered_emojis = Gitlab::Emoji.emojis_unicodes - IGNORE_UNICODE_EMOJIS
- /(#{filtered_emojis.map { |moji| Regexp.escape(moji) }.join('|')})/
- end
+ @emoji_unicode_pattern ||= TanukiEmoji.index.codepoints_pattern
end
private
diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb
index 0f856dc0eb9..39c42ceaf9b 100644
--- a/lib/banzai/filter/footnote_filter.rb
+++ b/lib/banzai/filter/footnote_filter.rb
@@ -16,37 +16,60 @@ module Banzai
# can be used for a single render). So you get `id=fn1-4335` and `id=fn2-4335`.
#
class FootnoteFilter < HTML::Pipeline::Filter
- INTEGER_PATTERN = /\A\d+\z/.freeze
- FOOTNOTE_ID_PREFIX = 'fn'
- FOOTNOTE_LINK_ID_PREFIX = 'fnref'
- FOOTNOTE_LI_REFERENCE_PATTERN = /\A#{FOOTNOTE_ID_PREFIX}\d+\z/.freeze
- FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}\d+\z/.freeze
- FOOTNOTE_START_NUMBER = 1
-
- CSS_SECTION = "ol > li[id=#{FOOTNOTE_ID_PREFIX}#{FOOTNOTE_START_NUMBER}]"
+ FOOTNOTE_ID_PREFIX = 'fn-'
+ FOOTNOTE_LINK_ID_PREFIX = 'fnref-'
+ FOOTNOTE_LI_REFERENCE_PATTERN = /\A#{FOOTNOTE_ID_PREFIX}.+\z/.freeze
+ FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}.+\z/.freeze
+
+ CSS_SECTION = "ol > li a[href^=\"\##{FOOTNOTE_LINK_ID_PREFIX}\"]"
XPATH_SECTION = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_SECTION).freeze
CSS_FOOTNOTE = 'sup > a[id]'
XPATH_FOOTNOTE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_FOOTNOTE).freeze
+ # only needed when feature flag use_cmark_renderer is turned off
+ INTEGER_PATTERN = /\A\d+\z/.freeze
+ FOOTNOTE_ID_PREFIX_OLD = 'fn'
+ FOOTNOTE_LINK_ID_PREFIX_OLD = 'fnref'
+ FOOTNOTE_LI_REFERENCE_PATTERN_OLD = /\A#{FOOTNOTE_ID_PREFIX_OLD}\d+\z/.freeze
+ FOOTNOTE_LINK_REFERENCE_PATTERN_OLD = /\A#{FOOTNOTE_LINK_ID_PREFIX_OLD}\d+\z/.freeze
+ FOOTNOTE_START_NUMBER = 1
+ CSS_SECTION_OLD = "ol > li[id=#{FOOTNOTE_ID_PREFIX_OLD}#{FOOTNOTE_START_NUMBER}]"
+ XPATH_SECTION_OLD = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_SECTION_OLD).freeze
+
def call
- return doc unless first_footnote = doc.at_xpath(XPATH_SECTION)
+ xpath_section = Feature.enabled?(:use_cmark_renderer) ? XPATH_SECTION : XPATH_SECTION_OLD
+ return doc unless first_footnote = doc.at_xpath(xpath_section)
# Sanitization stripped off the section wrapper - add it back in
- first_footnote.parent.wrap('<section class="footnotes">')
+ if Feature.enabled?(:use_cmark_renderer)
+ first_footnote.parent.parent.parent.wrap('<section class="footnotes" data-footnotes>')
+ else
+ first_footnote.parent.wrap('<section class="footnotes">')
+ end
+
rand_suffix = "-#{random_number}"
modified_footnotes = {}
doc.xpath(XPATH_FOOTNOTE).each do |link_node|
- ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX)
- node_xpath = Gitlab::Utils::Nokogiri.css_to_xpath("li[id=#{fn_id(ref_num)}]")
+ if Feature.enabled?(:use_cmark_renderer)
+ ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX)
+ ref_num.gsub!(/[[:punct:]]/, '\\\\\&')
+ else
+ ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX_OLD)
+ end
+
+ node_xpath = Gitlab::Utils::Nokogiri.css_to_xpath("li[id=#{fn_id(ref_num)}]")
footnote_node = doc.at_xpath(node_xpath)
- if INTEGER_PATTERN.match?(ref_num) && (footnote_node || modified_footnotes[ref_num])
+ if footnote_node || modified_footnotes[ref_num]
+ next if Feature.disabled?(:use_cmark_renderer) && !INTEGER_PATTERN.match?(ref_num)
+
link_node[:href] += rand_suffix
link_node[:id] += rand_suffix
# Sanitization stripped off class - add it back in
link_node.parent.append_class('footnote-ref')
+ link_node['data-footnote-ref'] = nil if Feature.enabled?(:use_cmark_renderer)
unless modified_footnotes[ref_num]
footnote_node[:id] += rand_suffix
@@ -55,6 +78,7 @@ module Banzai
if backref_node
backref_node[:href] += rand_suffix
backref_node.append_class('footnote-backref')
+ backref_node['data-footnote-backref'] = nil if Feature.enabled?(:use_cmark_renderer)
end
modified_footnotes[ref_num] = true
@@ -72,11 +96,13 @@ module Banzai
end
def fn_id(num)
- "#{FOOTNOTE_ID_PREFIX}#{num}"
+ prefix = Feature.enabled?(:use_cmark_renderer) ? FOOTNOTE_ID_PREFIX : FOOTNOTE_ID_PREFIX_OLD
+ "#{prefix}#{num}"
end
def fnref_id(num)
- "#{FOOTNOTE_LINK_ID_PREFIX}#{num}"
+ prefix = Feature.enabled?(:use_cmark_renderer) ? FOOTNOTE_LINK_ID_PREFIX : FOOTNOTE_LINK_ID_PREFIX_OLD
+ "#{prefix}#{num}"
end
end
end
diff --git a/lib/banzai/filter/markdown_engines/common_mark.rb b/lib/banzai/filter/markdown_engines/common_mark.rb
index 7be52fc497f..a25ebedf029 100644
--- a/lib/banzai/filter/markdown_engines/common_mark.rb
+++ b/lib/banzai/filter/markdown_engines/common_mark.rb
@@ -13,8 +13,7 @@ module Banzai
EXTENSIONS = [
:autolink, # provides support for automatically converting URLs to anchor tags.
:strikethrough, # provides support for strikethroughs.
- :table, # provides support for tables.
- :tagfilter # strips out several "unsafe" HTML tags from being used: https://github.github.com/gfm/#disallowed-raw-html-extension-
+ :table # provides support for tables.
].freeze
PARSE_OPTIONS = [
@@ -23,36 +22,63 @@ module Banzai
:VALIDATE_UTF8 # replace illegal sequences with the replacement character U+FFFD.
].freeze
+ RENDER_OPTIONS_C = [
+ :GITHUB_PRE_LANG, # use GitHub-style <pre lang> for fenced code blocks.
+ :FOOTNOTES, # render footnotes.
+ :FULL_INFO_STRING, # include full info strings of code blocks in separate attribute.
+ :UNSAFE # allow raw/custom HTML and unsafe links.
+ ].freeze
+
# The `:GITHUB_PRE_LANG` option is not used intentionally because
# it renders a fence block with language as `<pre lang="LANG"><code>some code\n</code></pre>`
# while GitLab's syntax is `<pre><code lang="LANG">some code\n</code></pre>`.
# If in the future the syntax is about to be made GitHub-compatible, please, add `:GITHUB_PRE_LANG` render option below
# and remove `code_block` method from `lib/banzai/renderer/common_mark/html.rb`.
- RENDER_OPTIONS = [
+ RENDER_OPTIONS_RUBY = [
# as of commonmarker 0.18.0, we need to use :UNSAFE to get the same as the original :DEFAULT
# https://github.com/gjtorikian/commonmarker/pull/81
- :UNSAFE
- ].freeze
-
- RENDER_OPTIONS_SOURCEPOS = RENDER_OPTIONS + [
- :SOURCEPOS # enable embedding of source position information
+ :UNSAFE # allow raw/custom HTML and unsafe links.
].freeze
def initialize(context)
- @context = context
- @renderer = Banzai::Renderer::CommonMark::HTML.new(options: render_options)
+ @context = context
+ @renderer = Banzai::Renderer::CommonMark::HTML.new(options: render_options) if Feature.disabled?(:use_cmark_renderer)
end
def render(text)
- doc = CommonMarker.render_doc(text, PARSE_OPTIONS, EXTENSIONS)
+ if Feature.enabled?(:use_cmark_renderer)
+ CommonMarker.render_html(text, render_options, extensions)
+ else
+ doc = CommonMarker.render_doc(text, PARSE_OPTIONS, extensions)
- @renderer.render(doc)
+ @renderer.render(doc)
+ end
end
private
+ def extensions
+ if Feature.enabled?(:use_cmark_renderer)
+ EXTENSIONS
+ else
+ EXTENSIONS + [
+ :tagfilter # strips out several "unsafe" HTML tags from being used: https://github.github.com/gfm/#disallowed-raw-html-extension-
+ ].freeze
+ end
+ end
+
def render_options
- @context[:no_sourcepos] ? RENDER_OPTIONS : RENDER_OPTIONS_SOURCEPOS
+ @context[:no_sourcepos] ? render_options_no_sourcepos : render_options_sourcepos
+ end
+
+ def render_options_no_sourcepos
+ Feature.enabled?(:use_cmark_renderer) ? RENDER_OPTIONS_C : RENDER_OPTIONS_RUBY
+ end
+
+ def render_options_sourcepos
+ render_options_no_sourcepos + [
+ :SOURCEPOS # enable embedding of source position information
+ ].freeze
end
end
end
diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb
index b69afdcfebe..ccffe1bfbb1 100644
--- a/lib/banzai/filter/markdown_post_escape_filter.rb
+++ b/lib/banzai/filter/markdown_post_escape_filter.rb
@@ -8,10 +8,8 @@ module Banzai
NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze
SPAN_REGEX = %r{<span>(.*?)</span>}.freeze
- CSS_A = 'a'
- XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze
- CSS_CODE = 'code'
- XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze
+ CSS_A = 'a'
+ XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze
def call
return doc unless result[:escaped_literals]
@@ -34,12 +32,22 @@ module Banzai
node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title']
end
- doc.xpath(XPATH_CODE).each do |node|
+ doc.xpath(lang_tag).each do |node|
node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang']
end
doc
end
+
+ private
+
+ def lang_tag
+ if Feature.enabled?(:use_cmark_renderer)
+ Gitlab::Utils::Nokogiri.css_to_xpath('pre')
+ else
+ Gitlab::Utils::Nokogiri.css_to_xpath('code')
+ end
+ end
end
end
end
diff --git a/lib/banzai/filter/plantuml_filter.rb b/lib/banzai/filter/plantuml_filter.rb
index 93370178a61..e67cdc7df12 100644
--- a/lib/banzai/filter/plantuml_filter.rb
+++ b/lib/banzai/filter/plantuml_filter.rb
@@ -5,18 +5,15 @@ require "asciidoctor_plantuml/plantuml"
module Banzai
module Filter
- # HTML that replaces all `code plantuml` tags with PlantUML img tags.
+ # HTML that replaces all `lang plantuml` tags with PlantUML img tags.
#
class PlantumlFilter < HTML::Pipeline::Filter
- CSS = 'pre > code[lang="plantuml"]'
- XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
-
def call
- return doc unless settings.plantuml_enabled? && doc.at_xpath(XPATH)
+ return doc unless settings.plantuml_enabled? && doc.at_xpath(lang_tag)
plantuml_setup
- doc.xpath(XPATH).each do |node|
+ doc.xpath(lang_tag).each do |node|
img_tag = Nokogiri::HTML::DocumentFragment.parse(
Asciidoctor::PlantUml::Processor.plantuml_content(node.content, {}))
node.parent.replace(img_tag)
@@ -27,6 +24,15 @@ module Banzai
private
+ def lang_tag
+ @lang_tag ||=
+ if Feature.enabled?(:use_cmark_renderer)
+ Gitlab::Utils::Nokogiri.css_to_xpath('pre[lang="plantuml"] > code').freeze
+ else
+ Gitlab::Utils::Nokogiri.css_to_xpath('pre > code[lang="plantuml"]').freeze
+ end
+ end
+
def settings
Gitlab::CurrentSettings.current_application_settings
end
diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb
index 1e84e7e8af3..7afbc1a1c9c 100644
--- a/lib/banzai/filter/sanitization_filter.rb
+++ b/lib/banzai/filter/sanitization_filter.rb
@@ -54,8 +54,13 @@ module Banzai
return unless node.name == 'a' || node.name == 'li'
return unless node.has_attribute?('id')
- return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN
- return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN
+ if Feature.enabled?(:use_cmark_renderer)
+ return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN
+ return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN
+ else
+ return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN_OLD
+ return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN_OLD
+ end
node.remove_attribute('id')
end
diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb
index 8d869cd63d3..66bd86c5bb4 100644
--- a/lib/banzai/filter/syntax_highlight_filter.rb
+++ b/lib/banzai/filter/syntax_highlight_filter.rb
@@ -11,7 +11,7 @@ module Banzai
class SyntaxHighlightFilter < HTML::Pipeline::Filter
include OutputSafety
- PARAMS_DELIMITER = ':'
+ LANG_PARAMS_DELIMITER = ':'
LANG_PARAMS_ATTR = 'data-lang-params'
CSS = 'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code'
@@ -27,7 +27,7 @@ module Banzai
def highlight_node(node)
css_classes = +'code highlight js-syntax-highlight'
- lang, lang_params = parse_lang_params(node.attr('lang'))
+ lang, lang_params = parse_lang_params(node)
sourcepos = node.parent.attr('data-sourcepos')
retried = false
@@ -56,7 +56,7 @@ module Banzai
retry
end
- sourcepos_attr = sourcepos ? "data-sourcepos=\"#{sourcepos}\"" : ""
+ sourcepos_attr = sourcepos ? "data-sourcepos=\"#{sourcepos}\"" : ''
highlighted = %(<pre #{sourcepos_attr} class="#{css_classes}"
lang="#{language}"
@@ -69,13 +69,36 @@ module Banzai
private
- def parse_lang_params(language)
+ def parse_lang_params(node)
+ node = node.parent if Feature.enabled?(:use_cmark_renderer)
+
+ # Commonmarker's FULL_INFO_STRING render option works with the space delimiter.
+ # But the current behavior of GitLab's markdown renderer is different - it grabs everything as the single
+ # line, including language and its options. To keep backward compatability, we have to parse the old format and
+ # merge with the new one.
+ #
+ # Behaviors before separating language and its parameters:
+ # Old ones:
+ # "```ruby with options```" -> '<pre><code lang="ruby with options">'.
+ # "```ruby:with:options```" -> '<pre><code lang="ruby:with:options">'.
+ #
+ # New ones:
+ # "```ruby with options```" -> '<pre><code lang="ruby" data-meta="with options">'.
+ # "```ruby:with:options```" -> '<pre><code lang="ruby:with:options">'.
+
+ language = node.attr('lang')
+
return unless language
- lang, params = language.split(PARAMS_DELIMITER, 2)
- formatted_params = %(#{LANG_PARAMS_ATTR}="#{escape_once(params)}") if params
+ language, language_params = language.split(LANG_PARAMS_DELIMITER, 2)
+
+ if Feature.enabled?(:use_cmark_renderer)
+ language_params = [node.attr('data-meta'), language_params].compact.join(' ')
+ end
+
+ formatted_language_params = format_language_params(language_params)
- [lang, formatted_params]
+ [language, formatted_language_params]
end
# Separate method so it can be instrumented.
@@ -95,6 +118,12 @@ module Banzai
def use_rouge?(language)
(%w(math suggestion) + ::AsciidoctorExtensions::Kroki::SUPPORTED_DIAGRAM_NAMES).exclude?(language)
end
+
+ def format_language_params(language_params)
+ return if language_params.blank?
+
+ %(#{LANG_PARAMS_ATTR}="#{escape_once(language_params)}")
+ end
end
end
end
diff --git a/lib/banzai/renderer.rb b/lib/banzai/renderer.rb
index fbbd6135959..b16af78841a 100644
--- a/lib/banzai/renderer.rb
+++ b/lib/banzai/renderer.rb
@@ -160,16 +160,40 @@ module Banzai
def self.cacheless_render(text, context = {})
return text.to_s unless text.present?
- Gitlab::Metrics.measure(:banzai_cacheless_render) do
- result = render_result(text, context)
+ real_start = Gitlab::Metrics::System.monotonic_time
+ cpu_start = Gitlab::Metrics::System.cpu_time
- output = result[:output]
- if output.respond_to?(:to_html)
- output.to_html
- else
- output.to_s
- end
- end
+ result = render_result(text, context)
+
+ output = result[:output]
+ rendered = if output.respond_to?(:to_html)
+ output.to_html
+ else
+ output.to_s
+ end
+
+ cpu_duration_histogram.observe({}, Gitlab::Metrics::System.cpu_time - cpu_start)
+ real_duration_histogram.observe({}, Gitlab::Metrics::System.monotonic_time - real_start)
+
+ rendered
+ end
+
+ def self.real_duration_histogram
+ Gitlab::Metrics.histogram(
+ :gitlab_banzai_cacheless_render_real_duration_seconds,
+ 'Duration of Banzai pipeline rendering in real time',
+ {},
+ [0.01, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10.0, 50, 100]
+ )
+ end
+
+ def self.cpu_duration_histogram
+ Gitlab::Metrics.histogram(
+ :gitlab_banzai_cacheless_render_cpu_duration_seconds,
+ 'Duration of Banzai pipeline rendering in cpu time',
+ {},
+ Gitlab::Metrics::EXECUTION_MEASUREMENT_BUCKETS
+ )
end
def self.full_cache_key(cache_key, pipeline_name)
diff --git a/lib/banzai/renderer/common_mark/html.rb b/lib/banzai/renderer/common_mark/html.rb
index 837665451a1..d9a2d9a9564 100644
--- a/lib/banzai/renderer/common_mark/html.rb
+++ b/lib/banzai/renderer/common_mark/html.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+# Remove this entire file when removing `use_cmark_renderer` feature flag and switching to the CMARK html renderer.
+# https://gitlab.com/gitlab-org/gitlab/-/issues/345744
module Banzai
module Renderer
module CommonMark