diff options
Diffstat (limited to 'lib/banzai')
-rw-r--r-- | lib/banzai/filter/emoji_filter.rb | 19 | ||||
-rw-r--r-- | lib/banzai/filter/footnote_filter.rb | 56 | ||||
-rw-r--r-- | lib/banzai/filter/markdown_engines/common_mark.rb | 52 | ||||
-rw-r--r-- | lib/banzai/filter/markdown_post_escape_filter.rb | 18 | ||||
-rw-r--r-- | lib/banzai/filter/plantuml_filter.rb | 18 | ||||
-rw-r--r-- | lib/banzai/filter/sanitization_filter.rb | 9 | ||||
-rw-r--r-- | lib/banzai/filter/syntax_highlight_filter.rb | 43 | ||||
-rw-r--r-- | lib/banzai/renderer.rb | 42 | ||||
-rw-r--r-- | lib/banzai/renderer/common_mark/html.rb | 2 |
9 files changed, 189 insertions, 70 deletions
diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb index 9d24bf028b6..d8c9fd0a7f0 100644 --- a/lib/banzai/filter/emoji_filter.rb +++ b/lib/banzai/filter/emoji_filter.rb @@ -8,7 +8,6 @@ module Banzai # Based on HTML::Pipeline::EmojiFilter class EmojiFilter < HTML::Pipeline::Filter IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set - IGNORE_UNICODE_EMOJIS = %w(™ © ®).freeze def call doc.xpath('descendant-or-self::text()').each do |node| @@ -35,7 +34,8 @@ module Banzai def emoji_name_element_unicode_filter(text) text.gsub(emoji_pattern) do |match| name = Regexp.last_match(1) - Gitlab::Emoji.gl_emoji_tag(name) + emoji = TanukiEmoji.find_by_alpha_code(name) + Gitlab::Emoji.gl_emoji_tag(emoji) end end @@ -46,26 +46,19 @@ module Banzai # Returns a String with unicode emoji replaced with gl-emoji unicode. def emoji_unicode_element_unicode_filter(text) text.gsub(emoji_unicode_pattern) do |moji| - emoji_info = Gitlab::Emoji.emojis_by_moji[moji] - Gitlab::Emoji.gl_emoji_tag(emoji_info['name']) + emoji = TanukiEmoji.find_by_codepoints(moji) + Gitlab::Emoji.gl_emoji_tag(emoji) end end # Build a regexp that matches all valid :emoji: names. def self.emoji_pattern - @emoji_pattern ||= - %r{(?<=[^[:alnum:]:]|\n|^) - :(#{Gitlab::Emoji.emojis_names.map { |name| Regexp.escape(name) }.join('|')}): - (?=[^[:alnum:]:]|$)}x + @emoji_pattern ||= TanukiEmoji.index.alpha_code_pattern end # Build a regexp that matches all valid unicode emojis names. def self.emoji_unicode_pattern - @emoji_unicode_pattern ||= - begin - filtered_emojis = Gitlab::Emoji.emojis_unicodes - IGNORE_UNICODE_EMOJIS - /(#{filtered_emojis.map { |moji| Regexp.escape(moji) }.join('|')})/ - end + @emoji_unicode_pattern ||= TanukiEmoji.index.codepoints_pattern end private diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb index 0f856dc0eb9..39c42ceaf9b 100644 --- a/lib/banzai/filter/footnote_filter.rb +++ b/lib/banzai/filter/footnote_filter.rb @@ -16,37 +16,60 @@ module Banzai # can be used for a single render). So you get `id=fn1-4335` and `id=fn2-4335`. # class FootnoteFilter < HTML::Pipeline::Filter - INTEGER_PATTERN = /\A\d+\z/.freeze - FOOTNOTE_ID_PREFIX = 'fn' - FOOTNOTE_LINK_ID_PREFIX = 'fnref' - FOOTNOTE_LI_REFERENCE_PATTERN = /\A#{FOOTNOTE_ID_PREFIX}\d+\z/.freeze - FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}\d+\z/.freeze - FOOTNOTE_START_NUMBER = 1 - - CSS_SECTION = "ol > li[id=#{FOOTNOTE_ID_PREFIX}#{FOOTNOTE_START_NUMBER}]" + FOOTNOTE_ID_PREFIX = 'fn-' + FOOTNOTE_LINK_ID_PREFIX = 'fnref-' + FOOTNOTE_LI_REFERENCE_PATTERN = /\A#{FOOTNOTE_ID_PREFIX}.+\z/.freeze + FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}.+\z/.freeze + + CSS_SECTION = "ol > li a[href^=\"\##{FOOTNOTE_LINK_ID_PREFIX}\"]" XPATH_SECTION = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_SECTION).freeze CSS_FOOTNOTE = 'sup > a[id]' XPATH_FOOTNOTE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_FOOTNOTE).freeze + # only needed when feature flag use_cmark_renderer is turned off + INTEGER_PATTERN = /\A\d+\z/.freeze + FOOTNOTE_ID_PREFIX_OLD = 'fn' + FOOTNOTE_LINK_ID_PREFIX_OLD = 'fnref' + FOOTNOTE_LI_REFERENCE_PATTERN_OLD = /\A#{FOOTNOTE_ID_PREFIX_OLD}\d+\z/.freeze + FOOTNOTE_LINK_REFERENCE_PATTERN_OLD = /\A#{FOOTNOTE_LINK_ID_PREFIX_OLD}\d+\z/.freeze + FOOTNOTE_START_NUMBER = 1 + CSS_SECTION_OLD = "ol > li[id=#{FOOTNOTE_ID_PREFIX_OLD}#{FOOTNOTE_START_NUMBER}]" + XPATH_SECTION_OLD = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_SECTION_OLD).freeze + def call - return doc unless first_footnote = doc.at_xpath(XPATH_SECTION) + xpath_section = Feature.enabled?(:use_cmark_renderer) ? XPATH_SECTION : XPATH_SECTION_OLD + return doc unless first_footnote = doc.at_xpath(xpath_section) # Sanitization stripped off the section wrapper - add it back in - first_footnote.parent.wrap('<section class="footnotes">') + if Feature.enabled?(:use_cmark_renderer) + first_footnote.parent.parent.parent.wrap('<section class="footnotes" data-footnotes>') + else + first_footnote.parent.wrap('<section class="footnotes">') + end + rand_suffix = "-#{random_number}" modified_footnotes = {} doc.xpath(XPATH_FOOTNOTE).each do |link_node| - ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX) - node_xpath = Gitlab::Utils::Nokogiri.css_to_xpath("li[id=#{fn_id(ref_num)}]") + if Feature.enabled?(:use_cmark_renderer) + ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX) + ref_num.gsub!(/[[:punct:]]/, '\\\\\&') + else + ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX_OLD) + end + + node_xpath = Gitlab::Utils::Nokogiri.css_to_xpath("li[id=#{fn_id(ref_num)}]") footnote_node = doc.at_xpath(node_xpath) - if INTEGER_PATTERN.match?(ref_num) && (footnote_node || modified_footnotes[ref_num]) + if footnote_node || modified_footnotes[ref_num] + next if Feature.disabled?(:use_cmark_renderer) && !INTEGER_PATTERN.match?(ref_num) + link_node[:href] += rand_suffix link_node[:id] += rand_suffix # Sanitization stripped off class - add it back in link_node.parent.append_class('footnote-ref') + link_node['data-footnote-ref'] = nil if Feature.enabled?(:use_cmark_renderer) unless modified_footnotes[ref_num] footnote_node[:id] += rand_suffix @@ -55,6 +78,7 @@ module Banzai if backref_node backref_node[:href] += rand_suffix backref_node.append_class('footnote-backref') + backref_node['data-footnote-backref'] = nil if Feature.enabled?(:use_cmark_renderer) end modified_footnotes[ref_num] = true @@ -72,11 +96,13 @@ module Banzai end def fn_id(num) - "#{FOOTNOTE_ID_PREFIX}#{num}" + prefix = Feature.enabled?(:use_cmark_renderer) ? FOOTNOTE_ID_PREFIX : FOOTNOTE_ID_PREFIX_OLD + "#{prefix}#{num}" end def fnref_id(num) - "#{FOOTNOTE_LINK_ID_PREFIX}#{num}" + prefix = Feature.enabled?(:use_cmark_renderer) ? FOOTNOTE_LINK_ID_PREFIX : FOOTNOTE_LINK_ID_PREFIX_OLD + "#{prefix}#{num}" end end end diff --git a/lib/banzai/filter/markdown_engines/common_mark.rb b/lib/banzai/filter/markdown_engines/common_mark.rb index 7be52fc497f..a25ebedf029 100644 --- a/lib/banzai/filter/markdown_engines/common_mark.rb +++ b/lib/banzai/filter/markdown_engines/common_mark.rb @@ -13,8 +13,7 @@ module Banzai EXTENSIONS = [ :autolink, # provides support for automatically converting URLs to anchor tags. :strikethrough, # provides support for strikethroughs. - :table, # provides support for tables. - :tagfilter # strips out several "unsafe" HTML tags from being used: https://github.github.com/gfm/#disallowed-raw-html-extension- + :table # provides support for tables. ].freeze PARSE_OPTIONS = [ @@ -23,36 +22,63 @@ module Banzai :VALIDATE_UTF8 # replace illegal sequences with the replacement character U+FFFD. ].freeze + RENDER_OPTIONS_C = [ + :GITHUB_PRE_LANG, # use GitHub-style <pre lang> for fenced code blocks. + :FOOTNOTES, # render footnotes. + :FULL_INFO_STRING, # include full info strings of code blocks in separate attribute. + :UNSAFE # allow raw/custom HTML and unsafe links. + ].freeze + # The `:GITHUB_PRE_LANG` option is not used intentionally because # it renders a fence block with language as `<pre lang="LANG"><code>some code\n</code></pre>` # while GitLab's syntax is `<pre><code lang="LANG">some code\n</code></pre>`. # If in the future the syntax is about to be made GitHub-compatible, please, add `:GITHUB_PRE_LANG` render option below # and remove `code_block` method from `lib/banzai/renderer/common_mark/html.rb`. - RENDER_OPTIONS = [ + RENDER_OPTIONS_RUBY = [ # as of commonmarker 0.18.0, we need to use :UNSAFE to get the same as the original :DEFAULT # https://github.com/gjtorikian/commonmarker/pull/81 - :UNSAFE - ].freeze - - RENDER_OPTIONS_SOURCEPOS = RENDER_OPTIONS + [ - :SOURCEPOS # enable embedding of source position information + :UNSAFE # allow raw/custom HTML and unsafe links. ].freeze def initialize(context) - @context = context - @renderer = Banzai::Renderer::CommonMark::HTML.new(options: render_options) + @context = context + @renderer = Banzai::Renderer::CommonMark::HTML.new(options: render_options) if Feature.disabled?(:use_cmark_renderer) end def render(text) - doc = CommonMarker.render_doc(text, PARSE_OPTIONS, EXTENSIONS) + if Feature.enabled?(:use_cmark_renderer) + CommonMarker.render_html(text, render_options, extensions) + else + doc = CommonMarker.render_doc(text, PARSE_OPTIONS, extensions) - @renderer.render(doc) + @renderer.render(doc) + end end private + def extensions + if Feature.enabled?(:use_cmark_renderer) + EXTENSIONS + else + EXTENSIONS + [ + :tagfilter # strips out several "unsafe" HTML tags from being used: https://github.github.com/gfm/#disallowed-raw-html-extension- + ].freeze + end + end + def render_options - @context[:no_sourcepos] ? RENDER_OPTIONS : RENDER_OPTIONS_SOURCEPOS + @context[:no_sourcepos] ? render_options_no_sourcepos : render_options_sourcepos + end + + def render_options_no_sourcepos + Feature.enabled?(:use_cmark_renderer) ? RENDER_OPTIONS_C : RENDER_OPTIONS_RUBY + end + + def render_options_sourcepos + render_options_no_sourcepos + [ + :SOURCEPOS # enable embedding of source position information + ].freeze end end end diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb index b69afdcfebe..ccffe1bfbb1 100644 --- a/lib/banzai/filter/markdown_post_escape_filter.rb +++ b/lib/banzai/filter/markdown_post_escape_filter.rb @@ -8,10 +8,8 @@ module Banzai NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze SPAN_REGEX = %r{<span>(.*?)</span>}.freeze - CSS_A = 'a' - XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze - CSS_CODE = 'code' - XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze + CSS_A = 'a' + XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze def call return doc unless result[:escaped_literals] @@ -34,12 +32,22 @@ module Banzai node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title'] end - doc.xpath(XPATH_CODE).each do |node| + doc.xpath(lang_tag).each do |node| node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang'] end doc end + + private + + def lang_tag + if Feature.enabled?(:use_cmark_renderer) + Gitlab::Utils::Nokogiri.css_to_xpath('pre') + else + Gitlab::Utils::Nokogiri.css_to_xpath('code') + end + end end end end diff --git a/lib/banzai/filter/plantuml_filter.rb b/lib/banzai/filter/plantuml_filter.rb index 93370178a61..e67cdc7df12 100644 --- a/lib/banzai/filter/plantuml_filter.rb +++ b/lib/banzai/filter/plantuml_filter.rb @@ -5,18 +5,15 @@ require "asciidoctor_plantuml/plantuml" module Banzai module Filter - # HTML that replaces all `code plantuml` tags with PlantUML img tags. + # HTML that replaces all `lang plantuml` tags with PlantUML img tags. # class PlantumlFilter < HTML::Pipeline::Filter - CSS = 'pre > code[lang="plantuml"]' - XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze - def call - return doc unless settings.plantuml_enabled? && doc.at_xpath(XPATH) + return doc unless settings.plantuml_enabled? && doc.at_xpath(lang_tag) plantuml_setup - doc.xpath(XPATH).each do |node| + doc.xpath(lang_tag).each do |node| img_tag = Nokogiri::HTML::DocumentFragment.parse( Asciidoctor::PlantUml::Processor.plantuml_content(node.content, {})) node.parent.replace(img_tag) @@ -27,6 +24,15 @@ module Banzai private + def lang_tag + @lang_tag ||= + if Feature.enabled?(:use_cmark_renderer) + Gitlab::Utils::Nokogiri.css_to_xpath('pre[lang="plantuml"] > code').freeze + else + Gitlab::Utils::Nokogiri.css_to_xpath('pre > code[lang="plantuml"]').freeze + end + end + def settings Gitlab::CurrentSettings.current_application_settings end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index 1e84e7e8af3..7afbc1a1c9c 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -54,8 +54,13 @@ module Banzai return unless node.name == 'a' || node.name == 'li' return unless node.has_attribute?('id') - return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN - return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN + if Feature.enabled?(:use_cmark_renderer) + return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN + return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN + else + return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN_OLD + return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN_OLD + end node.remove_attribute('id') end diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb index 8d869cd63d3..66bd86c5bb4 100644 --- a/lib/banzai/filter/syntax_highlight_filter.rb +++ b/lib/banzai/filter/syntax_highlight_filter.rb @@ -11,7 +11,7 @@ module Banzai class SyntaxHighlightFilter < HTML::Pipeline::Filter include OutputSafety - PARAMS_DELIMITER = ':' + LANG_PARAMS_DELIMITER = ':' LANG_PARAMS_ATTR = 'data-lang-params' CSS = 'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code' @@ -27,7 +27,7 @@ module Banzai def highlight_node(node) css_classes = +'code highlight js-syntax-highlight' - lang, lang_params = parse_lang_params(node.attr('lang')) + lang, lang_params = parse_lang_params(node) sourcepos = node.parent.attr('data-sourcepos') retried = false @@ -56,7 +56,7 @@ module Banzai retry end - sourcepos_attr = sourcepos ? "data-sourcepos=\"#{sourcepos}\"" : "" + sourcepos_attr = sourcepos ? "data-sourcepos=\"#{sourcepos}\"" : '' highlighted = %(<pre #{sourcepos_attr} class="#{css_classes}" lang="#{language}" @@ -69,13 +69,36 @@ module Banzai private - def parse_lang_params(language) + def parse_lang_params(node) + node = node.parent if Feature.enabled?(:use_cmark_renderer) + + # Commonmarker's FULL_INFO_STRING render option works with the space delimiter. + # But the current behavior of GitLab's markdown renderer is different - it grabs everything as the single + # line, including language and its options. To keep backward compatability, we have to parse the old format and + # merge with the new one. + # + # Behaviors before separating language and its parameters: + # Old ones: + # "```ruby with options```" -> '<pre><code lang="ruby with options">'. + # "```ruby:with:options```" -> '<pre><code lang="ruby:with:options">'. + # + # New ones: + # "```ruby with options```" -> '<pre><code lang="ruby" data-meta="with options">'. + # "```ruby:with:options```" -> '<pre><code lang="ruby:with:options">'. + + language = node.attr('lang') + return unless language - lang, params = language.split(PARAMS_DELIMITER, 2) - formatted_params = %(#{LANG_PARAMS_ATTR}="#{escape_once(params)}") if params + language, language_params = language.split(LANG_PARAMS_DELIMITER, 2) + + if Feature.enabled?(:use_cmark_renderer) + language_params = [node.attr('data-meta'), language_params].compact.join(' ') + end + + formatted_language_params = format_language_params(language_params) - [lang, formatted_params] + [language, formatted_language_params] end # Separate method so it can be instrumented. @@ -95,6 +118,12 @@ module Banzai def use_rouge?(language) (%w(math suggestion) + ::AsciidoctorExtensions::Kroki::SUPPORTED_DIAGRAM_NAMES).exclude?(language) end + + def format_language_params(language_params) + return if language_params.blank? + + %(#{LANG_PARAMS_ATTR}="#{escape_once(language_params)}") + end end end end diff --git a/lib/banzai/renderer.rb b/lib/banzai/renderer.rb index fbbd6135959..b16af78841a 100644 --- a/lib/banzai/renderer.rb +++ b/lib/banzai/renderer.rb @@ -160,16 +160,40 @@ module Banzai def self.cacheless_render(text, context = {}) return text.to_s unless text.present? - Gitlab::Metrics.measure(:banzai_cacheless_render) do - result = render_result(text, context) + real_start = Gitlab::Metrics::System.monotonic_time + cpu_start = Gitlab::Metrics::System.cpu_time - output = result[:output] - if output.respond_to?(:to_html) - output.to_html - else - output.to_s - end - end + result = render_result(text, context) + + output = result[:output] + rendered = if output.respond_to?(:to_html) + output.to_html + else + output.to_s + end + + cpu_duration_histogram.observe({}, Gitlab::Metrics::System.cpu_time - cpu_start) + real_duration_histogram.observe({}, Gitlab::Metrics::System.monotonic_time - real_start) + + rendered + end + + def self.real_duration_histogram + Gitlab::Metrics.histogram( + :gitlab_banzai_cacheless_render_real_duration_seconds, + 'Duration of Banzai pipeline rendering in real time', + {}, + [0.01, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10.0, 50, 100] + ) + end + + def self.cpu_duration_histogram + Gitlab::Metrics.histogram( + :gitlab_banzai_cacheless_render_cpu_duration_seconds, + 'Duration of Banzai pipeline rendering in cpu time', + {}, + Gitlab::Metrics::EXECUTION_MEASUREMENT_BUCKETS + ) end def self.full_cache_key(cache_key, pipeline_name) diff --git a/lib/banzai/renderer/common_mark/html.rb b/lib/banzai/renderer/common_mark/html.rb index 837665451a1..d9a2d9a9564 100644 --- a/lib/banzai/renderer/common_mark/html.rb +++ b/lib/banzai/renderer/common_mark/html.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +# Remove this entire file when removing `use_cmark_renderer` feature flag and switching to the CMARK html renderer. +# https://gitlab.com/gitlab-org/gitlab/-/issues/345744 module Banzai module Renderer module CommonMark |