Add latest changes from gitlab-org/security/gitlab@13-11-stable-ee

author: GitLab Bot <gitlab-bot@gitlab.com> 2021-05-31 11:40:45 +0000
committer: GitLab Bot <gitlab-bot@gitlab.com> 2021-05-31 11:41:14 +0000
commit: 279809e18f6949adb2543a45c0c800f549d35541 (patch)
tree: aac5347ff15bcdeadf7a00d7866cad76349c0fe4
parent: fcc1904c6fa3d5d71ca7f78470cf4c19ea888e1c (diff)
download: gitlab-ce-279809e18f6949adb2543a45c0c800f549d35541.tar.gz
27 files changed, 235 insertions, 53 deletions
diff --git a/app/helpers/markup_helper.rb b/app/helpers/markup_helper.rb
index ad206d0e5b5..6c19fcc9121 100644
--- a/app/helpers/markup_helper.rb
+++ b/app/helpers/markup_helper.rb
@@ -118,6 +118,7 @@ module MarkupHelper
 
   def markup(file_name, text, context = {})
     context[:project] ||= @project
+    context[:text_source] ||= :blob
     html = context.delete(:rendered) || markup_unsafe(file_name, text, context)
     prepare_for_rendering(html, context)
   end
diff --git a/lib/banzai/filter/absolute_link_filter.rb b/lib/banzai/filter/absolute_link_filter.rb
index a9bdb004c4b..cc7bf3ed556 100644
--- a/lib/banzai/filter/absolute_link_filter.rb
+++ b/lib/banzai/filter/absolute_link_filter.rb
@@ -6,10 +6,13 @@ module Banzai
   module Filter
     # HTML filter that converts relative urls into absolute ones.
     class AbsoluteLinkFilter < HTML::Pipeline::Filter
+      CSS   = 'a.gfm'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
         return doc unless context[:only_path] == false
 
-        doc.search('a.gfm').each do |el|
+        doc.xpath(XPATH).each do |el|
           process_link_attr el.attribute('href')
         end
 
diff --git a/lib/banzai/filter/ascii_doc_post_processing_filter.rb b/lib/banzai/filter/ascii_doc_post_processing_filter.rb
index 09f0fd7df45..83c729e13b5 100644
--- a/lib/banzai/filter/ascii_doc_post_processing_filter.rb
+++ b/lib/banzai/filter/ascii_doc_post_processing_filter.rb
@@ -3,14 +3,20 @@
 module Banzai
   module Filter
     class AsciiDocPostProcessingFilter < HTML::Pipeline::Filter
+      CSS_MATH   = '[data-math-style]'
+      XPATH_MATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MATH).freeze
+      CSS_MERM   = '[data-mermaid-style]'
+      XPATH_MERM = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MERM).freeze
+
       def call
-        doc.search('[data-math-style]').each do |node|
+        doc.xpath(XPATH_MATH).each do |node|
           node.set_attribute('class', 'code math js-render-math')
         end
 
-        doc.search('[data-mermaid-style]').each do |node|
+        doc.xpath(XPATH_MERM).each do |node|
           node.set_attribute('class', 'js-render-mermaid')
         end
+
         doc
       end
     end
diff --git a/lib/banzai/filter/base_relative_link_filter.rb b/lib/banzai/filter/base_relative_link_filter.rb
index fd526df4c48..60d09b69a10 100644
--- a/lib/banzai/filter/base_relative_link_filter.rb
+++ b/lib/banzai/filter/base_relative_link_filter.rb
@@ -7,23 +7,20 @@ module Banzai
     class BaseRelativeLinkFilter < HTML::Pipeline::Filter
       include Gitlab::Utils::StrongMemoize
 
+      CSS   = 'a:not(.gfm), img:not(.gfm), video:not(.gfm), audio:not(.gfm)'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       protected
 
       def linkable_attributes
         strong_memoize(:linkable_attributes) do
           attrs = []
 
-          attrs += doc.search('a:not(.gfm)').map do |el|
-            el.attribute('href')
-          end
-
-          attrs += doc.search('img:not(.gfm), video:not(.gfm), audio:not(.gfm)').flat_map do |el|
-            [el.attribute('src'), el.attribute('data-src')]
+          attrs += doc.xpath(XPATH).flat_map do |el|
+            [el.attribute('href'), el.attribute('src'), el.attribute('data-src')]
           end
 
-          attrs.reject do |attr|
-            attr.blank? || attr.value.start_with?('//')
-          end
+          attrs.reject { |attr| attr.blank? || attr.value.start_with?('//') }
         end
       end
 
diff --git a/lib/banzai/filter/color_filter.rb b/lib/banzai/filter/color_filter.rb
index 0aca7441638..58e9b8cdba1 100644
--- a/lib/banzai/filter/color_filter.rb
+++ b/lib/banzai/filter/color_filter.rb
@@ -7,8 +7,11 @@ module Banzai
     class ColorFilter < HTML::Pipeline::Filter
       COLOR_CHIP_CLASS = 'gfm-color_chip'
 
+      CSS   = 'code'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
-        doc.css('code').each do |node|
+        doc.xpath(XPATH).each do |node|
           color = ColorParser.parse(node.content)
           node << color_chip(color) if color
         end
diff --git a/lib/banzai/filter/custom_emoji_filter.rb b/lib/banzai/filter/custom_emoji_filter.rb
index 1ee8f4e31e8..e26c5d36f2b 100644
--- a/lib/banzai/filter/custom_emoji_filter.rb
+++ b/lib/banzai/filter/custom_emoji_filter.rb
@@ -9,7 +9,7 @@ module Banzai
         return doc unless context[:project]
         return doc unless Feature.enabled?(:custom_emoji, context[:project])
 
-        doc.search(".//text()").each do |node|
+        doc.xpath('descendant-or-self::text()').each do |node|
           content = node.to_html
 
           next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb
index 8952a3ff6b4..9d24bf028b6 100644
--- a/lib/banzai/filter/emoji_filter.rb
+++ b/lib/banzai/filter/emoji_filter.rb
@@ -11,7 +11,7 @@ module Banzai
       IGNORE_UNICODE_EMOJIS = %w(™ © ®).freeze
 
       def call
-        doc.search(".//text()").each do |node|
+        doc.xpath('descendant-or-self::text()').each do |node|
           content = node.to_html
           next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
 
diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb
index 5474242e03c..0f856dc0eb9 100644
--- a/lib/banzai/filter/footnote_filter.rb
+++ b/lib/banzai/filter/footnote_filter.rb
@@ -23,17 +23,23 @@ module Banzai
       FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}\d+\z/.freeze
       FOOTNOTE_START_NUMBER           = 1
 
+      CSS_SECTION    = "ol > li[id=#{FOOTNOTE_ID_PREFIX}#{FOOTNOTE_START_NUMBER}]"
+      XPATH_SECTION  = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_SECTION).freeze
+      CSS_FOOTNOTE   = 'sup > a[id]'
+      XPATH_FOOTNOTE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_FOOTNOTE).freeze
+
       def call
-        return doc unless first_footnote = doc.at_css("ol > li[id=#{fn_id(FOOTNOTE_START_NUMBER)}]")
+        return doc unless first_footnote = doc.at_xpath(XPATH_SECTION)
 
         # Sanitization stripped off the section wrapper - add it back in
         first_footnote.parent.wrap('<section class="footnotes">')
         rand_suffix = "-#{random_number}"
         modified_footnotes = {}
 
-        doc.css('sup > a[id]').each do |link_node|
+        doc.xpath(XPATH_FOOTNOTE).each do |link_node|
           ref_num       = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX)
-          footnote_node = doc.at_css("li[id=#{fn_id(ref_num)}]")
+          node_xpath    = Gitlab::Utils::Nokogiri.css_to_xpath("li[id=#{fn_id(ref_num)}]")
+          footnote_node = doc.at_xpath(node_xpath)
 
           if INTEGER_PATTERN.match?(ref_num) && (footnote_node || modified_footnotes[ref_num])
             link_node[:href] += rand_suffix
diff --git a/lib/banzai/filter/gollum_tags_filter.rb b/lib/banzai/filter/gollum_tags_filter.rb
index 6de9f2b86f6..0548d5a9997 100644
--- a/lib/banzai/filter/gollum_tags_filter.rb
+++ b/lib/banzai/filter/gollum_tags_filter.rb
@@ -60,7 +60,7 @@ module Banzai
       IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set
 
       def call
-        doc.search(".//text()").each do |node|
+        doc.xpath('descendant-or-self::text()').each do |node|
           next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
           next unless node.content =~ TAGS_PATTERN
 
diff --git a/lib/banzai/filter/image_lazy_load_filter.rb b/lib/banzai/filter/image_lazy_load_filter.rb
index d8b9eb29cf5..916c135b777 100644
--- a/lib/banzai/filter/image_lazy_load_filter.rb
+++ b/lib/banzai/filter/image_lazy_load_filter.rb
@@ -6,8 +6,11 @@ module Banzai
     # HTML filter that moves the value of image `src` attributes to `data-src`
     # so they can be lazy loaded.
     class ImageLazyLoadFilter < HTML::Pipeline::Filter
+      CSS   = 'img'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
-        doc.xpath('descendant-or-self::img').each do |img|
+        doc.xpath(XPATH).each do |img|
           img.add_class('lazy')
           img['data-src'] = img['src']
           img['src'] = LazyImageTagHelper.placeholder_image
diff --git a/lib/banzai/filter/inline_diff_filter.rb b/lib/banzai/filter/inline_diff_filter.rb
index 5a1c0bee32d..e47ff15e7b7 100644
--- a/lib/banzai/filter/inline_diff_filter.rb
+++ b/lib/banzai/filter/inline_diff_filter.rb
@@ -7,7 +7,7 @@ module Banzai
       IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set
 
       def call
-        doc.search(".//text()").each do |node|
+        doc.xpath('descendant-or-self::text()').each do |node|
           next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
 
           content = node.to_html
diff --git a/lib/banzai/filter/inline_metrics_redactor_filter.rb b/lib/banzai/filter/inline_metrics_redactor_filter.rb
index 2259115acfc..b256815ae84 100644
--- a/lib/banzai/filter/inline_metrics_redactor_filter.rb
+++ b/lib/banzai/filter/inline_metrics_redactor_filter.rb
@@ -8,6 +8,7 @@ module Banzai
       include Gitlab::Utils::StrongMemoize
 
       METRICS_CSS_CLASS = '.js-render-metrics'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(METRICS_CSS_CLASS).freeze
       EMBED_LIMIT = 100
 
       Route = Struct.new(:regex, :permission)
@@ -41,7 +42,7 @@ module Banzai
       # @return [Nokogiri::XML::NodeSet]
       def nodes
         strong_memoize(:nodes) do
-          nodes = doc.css(METRICS_CSS_CLASS)
+          nodes = doc.xpath(XPATH)
           nodes.drop(EMBED_LIMIT).each(&:remove)
 
           nodes
diff --git a/lib/banzai/filter/kroki_filter.rb b/lib/banzai/filter/kroki_filter.rb
index dbd4de32a47..3803302c324 100644
--- a/lib/banzai/filter/kroki_filter.rb
+++ b/lib/banzai/filter/kroki_filter.rb
@@ -15,10 +15,11 @@ module Banzai
                                 .map { |diagram_type| %(pre[lang="#{diagram_type}"] > code) }
                                 .join(', ')
 
-        return doc unless doc.at(diagram_selectors)
+        xpath = Gitlab::Utils::Nokogiri.css_to_xpath(diagram_selectors)
+        return doc unless doc.at_xpath(xpath)
 
         diagram_format = "svg"
-        doc.css(diagram_selectors).each do |node|
+        doc.xpath(xpath).each do |node|
           diagram_type = node.parent['lang']
           img_tag = Nokogiri::HTML::DocumentFragment.parse(%(<img src="#{create_image_src(diagram_type, diagram_format, node.content)}"/>))
           node.parent.replace(img_tag)
diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb
index ad32e9afbf5..b69afdcfebe 100644
--- a/lib/banzai/filter/markdown_post_escape_filter.rb
+++ b/lib/banzai/filter/markdown_post_escape_filter.rb
@@ -8,6 +8,11 @@ module Banzai
       NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze
       SPAN_REGEX        = %r{<span>(.*?)</span>}.freeze
 
+      CSS_A      = 'a'
+      XPATH_A    = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze
+      CSS_CODE   = 'code'
+      XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze
+
       def call
         return doc unless result[:escaped_literals]
 
@@ -24,12 +29,12 @@ module Banzai
         # Banzai::Renderer::CommonMark::HTML.  However, we eventually want to use
         # the built-in compiled renderer, rather than the ruby version, for speed.
         # So let's do this work here.
-        doc.css('a').each do |node|
+        doc.xpath(XPATH_A).each do |node|
           node.attributes['href'].value  = node.attributes['href'].value.gsub(SPAN_REGEX, '\1') if node.attributes['href']
           node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title']
         end
 
-        doc.css('code').each do |node|
+        doc.xpath(XPATH_CODE).each do |node|
           node.attributes['lang'].value  = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang']
         end
 
diff --git a/lib/banzai/filter/math_filter.rb b/lib/banzai/filter/math_filter.rb
index 2247984b86d..53dafe45fb3 100644
--- a/lib/banzai/filter/math_filter.rb
+++ b/lib/banzai/filter/math_filter.rb
@@ -10,6 +10,11 @@ module Banzai
     # HTML filter that adds class="code math" and removes the dollar sign in $`2+2`$.
     #
     class MathFilter < HTML::Pipeline::Filter
+      CSS_MATH   = 'pre.code.language-math'
+      XPATH_MATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MATH).freeze
+      CSS_CODE   = 'code'
+      XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze
+
       # Attribute indicating inline or display math.
       STYLE_ATTRIBUTE = 'data-math-style'
 
@@ -21,7 +26,7 @@ module Banzai
       DOLLAR_SIGN = '$'
 
       def call
-        doc.css('code').each do |code|
+        doc.xpath(XPATH_CODE).each do |code|
           closing = code.next
           opening = code.previous
 
@@ -39,7 +44,7 @@ module Banzai
           end
         end
 
-        doc.css('pre.code.language-math').each do |el|
+        doc.xpath(XPATH_MATH).each do |el|
           el[STYLE_ATTRIBUTE] = 'display'
           el[:class] += " #{TAG_CLASS}"
         end
diff --git a/lib/banzai/filter/mermaid_filter.rb b/lib/banzai/filter/mermaid_filter.rb
index f0adb83af8a..aaaf851ccf0 100644
--- a/lib/banzai/filter/mermaid_filter.rb
+++ b/lib/banzai/filter/mermaid_filter.rb
@@ -4,8 +4,11 @@
 module Banzai
   module Filter
     class MermaidFilter < HTML::Pipeline::Filter
+      CSS   = 'pre[lang="mermaid"] > code'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
-        doc.css('pre[lang="mermaid"] > code').add_class('js-render-mermaid')
+        doc.xpath(XPATH).add_class('js-render-mermaid')
 
         doc
       end
diff --git a/lib/banzai/filter/plantuml_filter.rb b/lib/banzai/filter/plantuml_filter.rb
index 37d4126c1ba..93370178a61 100644
--- a/lib/banzai/filter/plantuml_filter.rb
+++ b/lib/banzai/filter/plantuml_filter.rb
@@ -8,12 +8,15 @@ module Banzai
     # HTML that replaces all `code plantuml` tags with PlantUML img tags.
     #
     class PlantumlFilter < HTML::Pipeline::Filter
+      CSS   = 'pre > code[lang="plantuml"]'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
-        return doc unless settings.plantuml_enabled? && doc.at('pre > code[lang="plantuml"]')
+        return doc unless settings.plantuml_enabled? && doc.at_xpath(XPATH)
 
         plantuml_setup
 
-        doc.css('pre > code[lang="plantuml"]').each do |node|
+        doc.xpath(XPATH).each do |node|
           img_tag = Nokogiri::HTML::DocumentFragment.parse(
             Asciidoctor::PlantUml::Processor.plantuml_content(node.content, {}))
           node.parent.replace(img_tag)
diff --git a/lib/banzai/filter/suggestion_filter.rb b/lib/banzai/filter/suggestion_filter.rb
index 56a14ec0737..aa1fcb1021c 100644
--- a/lib/banzai/filter/suggestion_filter.rb
+++ b/lib/banzai/filter/suggestion_filter.rb
@@ -7,10 +7,13 @@ module Banzai
       # Class used for tagging elements that should be rendered
       TAG_CLASS = 'js-render-suggestion'
 
+      CSS   = 'pre.language-suggestion > code'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
         return doc unless suggestions_filter_enabled?
 
-        doc.search('pre.language-suggestion > code').each do |node|
+        doc.xpath(XPATH).each do |node|
           node.add_class(TAG_CLASS)
         end
 
diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb
index 731a2bb4c77..eceeb113555 100644
--- a/lib/banzai/filter/syntax_highlight_filter.rb
+++ b/lib/banzai/filter/syntax_highlight_filter.rb
@@ -14,8 +14,11 @@ module Banzai
       PARAMS_DELIMITER = ':'
       LANG_PARAMS_ATTR = 'data-lang-params'
 
+      CSS   = 'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
-        doc.search('pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code').each do |node|
+        doc.xpath(XPATH).each do |node|
           highlight_node(node)
         end
 
diff --git a/lib/banzai/filter/table_of_contents_filter.rb b/lib/banzai/filter/table_of_contents_filter.rb
index b362607aed2..13ca9cde567 100644
--- a/lib/banzai/filter/table_of_contents_filter.rb
+++ b/lib/banzai/filter/table_of_contents_filter.rb
@@ -19,6 +19,9 @@ module Banzai
     class TableOfContentsFilter < HTML::Pipeline::Filter
       include Gitlab::Utils::Markdown
 
+      CSS   = 'h1, h2, h3, h4, h5, h6'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       def call
         return doc if context[:no_header_anchors]
 
@@ -27,7 +30,7 @@ module Banzai
         headers = Hash.new(0)
         header_root = current_header = HeaderNode.new
 
-        doc.css('h1, h2, h3, h4, h5, h6').each do |node|
+        doc.xpath(XPATH).each do |node|
           if header_content = node.children.first
             id = string_to_anchor(node.text)
 
diff --git a/lib/banzai/filter/truncate_source_filter.rb b/lib/banzai/filter/truncate_source_filter.rb
index 44f88b253d9..a21d4a44295 100644
--- a/lib/banzai/filter/truncate_source_filter.rb
+++ b/lib/banzai/filter/truncate_source_filter.rb
@@ -3,12 +3,29 @@
 module Banzai
   module Filter
     class TruncateSourceFilter < HTML::Pipeline::TextFilter
+      CHARACTER_COUNT_LIMIT = 1.megabyte
+      USER_MSG_LIMIT = 10_000
+
       def call
-        return text unless context.key?(:limit)
+        # don't truncate if it's a :blob and no limit is set
+        return text if context[:text_source] == :blob && !context.key?(:limit)
+
+        limit = context[:limit] || CHARACTER_COUNT_LIMIT
+
+        # no sense in allowing `truncate_bytes` to duplicate a large
+        # string unless it's too big
+        return text if text.bytesize <= limit
 
         # Use three dots instead of the ellipsis Unicode character because
         # some clients show the raw Unicode value in the merge commit.
-        text.truncate_bytes(context[:limit], omission: '...')
+        trunc = text.truncate_bytes(limit, omission: '...')
+
+        # allows us to indicate to the user that what they see is a truncated copy
+        if limit > USER_MSG_LIMIT
+          trunc.prepend("_The text is longer than #{limit} characters and has been visually truncated._\n\n")
+        end
+
+        trunc
       end
     end
   end
diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb
index 44f13612fde..2b95d87ff8e 100644
--- a/lib/banzai/filter/wiki_link_filter.rb
+++ b/lib/banzai/filter/wiki_link_filter.rb
@@ -10,14 +10,21 @@ module Banzai
     class WikiLinkFilter < HTML::Pipeline::Filter
       include Gitlab::Utils::SanitizeNodeLink
 
+      CSS_A     = 'a:not(.gfm)'
+      XPATH_A   = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze
+      CSS_VA    = 'video, audio'
+      XPATH_VA  = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_VA).freeze
+      CSS_IMG   = 'img'
+      XPATH_IMG = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_IMG).freeze
+
       def call
         return doc unless wiki?
 
-        doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) }
+        doc.xpath(XPATH_A).each { |el| process_link(el.attribute('href'), el) }
 
-        doc.search('video, audio').each { |el| process_link(el.attribute('src'), el) }
+        doc.xpath(XPATH_VA).each { |el| process_link(el.attribute('src'), el) }
 
-        doc.search('img').each do |el|
+        doc.xpath(XPATH_IMG).each do |el|
           attr = el.attribute('data-src') || el.attribute('src')
 
           process_link(attr, el)
diff --git a/lib/gitlab/diff/suggestions_parser.rb b/lib/gitlab/diff/suggestions_parser.rb
index f3e6fc455ac..6f126147113 100644
--- a/lib/gitlab/diff/suggestions_parser.rb
+++ b/lib/gitlab/diff/suggestions_parser.rb
@@ -6,6 +6,9 @@ module Gitlab
       # Matches for instance "-1", "+1" or "-1+2".
       SUGGESTION_CONTEXT = /^(\-(?<above>\d+))?(\+(?<below>\d+))?$/.freeze
 
+      CSS   = 'pre.language-suggestion'
+      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
       class << self
         # Returns an array of Gitlab::Diff::Suggestion which represents each
         # suggestion in the given text.
@@ -17,7 +20,7 @@ module Gitlab
                                      no_original_data: true,
                                      suggestions_filter_enabled: supports_suggestion)
           doc = Nokogiri::HTML(html)
-          suggestion_nodes = doc.search('pre.language-suggestion')
+          suggestion_nodes = doc.xpath(XPATH)
 
           return [] if suggestion_nodes.empty?
 
diff --git a/lib/gitlab/utils/nokogiri.rb b/lib/gitlab/utils/nokogiri.rb
new file mode 100644
index 00000000000..4b37bb7e5ea
--- /dev/null
+++ b/lib/gitlab/utils/nokogiri.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Utils
+    class Nokogiri
+      class << self
+        # Use Nokogiri to convert a css selector into an xpath selector.
+        # Nokogiri can use css selectors with `doc.search()`.  However
+        # for large node trees, it is _much_ slower than using xpath,
+        # by several orders of magnitude.
+        # https://gitlab.com/gitlab-org/gitlab/-/issues/329186
+        def css_to_xpath(css)
+          xpath = ::Nokogiri::CSS.xpath_for(css)
+
+          # Due to https://github.com/sparklemotion/nokogiri/issues/572,
+          # we remove the leading `//` and add `descendant-or-self::`
+          # in order to ensure we're searching from this node and all
+          # descendants.
+          xpath.map { |t| "descendant-or-self::#{t[2..-1]}" }.join('|')
+        end
+      end
+    end
+  end
+end
diff --git a/spec/helpers/markup_helper_spec.rb b/spec/helpers/markup_helper_spec.rb
index 00a59f037e0..e946857ac77 100644
--- a/spec/helpers/markup_helper_spec.rb
+++ b/spec/helpers/markup_helper_spec.rb
@@ -418,6 +418,13 @@ FooBar
   describe '#markup' do
     let(:content) { 'Noël' }
 
+    it 'sets the :text_source to :blob in the context' do
+      context = {}
+      helper.markup('foo.md', content, context)
+
+      expect(context).to include(text_source: :blob)
+    end
+
     it 'preserves encoding' do
       expect(content.encoding.name).to eq('UTF-8')
       expect(helper.markup('foo.rst', content).encoding.name).to eq('UTF-8')
diff --git a/spec/lib/banzai/filter/truncate_source_filter_spec.rb b/spec/lib/banzai/filter/truncate_source_filter_spec.rb
index d5eb8b738b1..8970aa1d382 100644
--- a/spec/lib/banzai/filter/truncate_source_filter_spec.rb
+++ b/spec/lib/banzai/filter/truncate_source_filter_spec.rb
@@ -8,24 +8,68 @@ RSpec.describe Banzai::Filter::TruncateSourceFilter do
   let(:short_text) { 'foo' * 10 }
   let(:long_text) { ([short_text] * 10).join(' ') }
 
-  it 'does nothing when limit is unspecified' do
-    output = filter(long_text)
-
-    expect(output).to eq(long_text)
+  before do
+    stub_const("#{described_class}::CHARACTER_COUNT_LIMIT", 50)
+    stub_const("#{described_class}::USER_MSG_LIMIT", 20)
   end
 
-  it 'does nothing to a short-enough text' do
-    output = filter(short_text, limit: short_text.bytesize)
+  context 'when markdown belongs to a blob' do
+    it 'does nothing when limit is unspecified' do
+      output = filter(long_text, text_source: :blob)
+
+      expect(output).to eq(long_text)
+    end
+
+    it 'truncates normally when limit specified' do
+      truncated = 'foofoof...'
+
+      output = filter(long_text, text_source: :blob, limit: 10)
 
-    expect(output).to eq(short_text)
+      expect(output).to eq(truncated)
+    end
   end
 
-  it 'truncates UTF-8 text by bytes, on a character boundary' do
-    utf8_text = '日本語の文字が大きい'
-    truncated = '日...'
+  context 'when markdown belongs to a field (non-blob)' do
+    it 'does nothing when limit is greater' do
+      output = filter(long_text, limit: 1.megabyte)
+
+      expect(output).to eq(long_text)
+    end
+
+    it 'truncates to the default when limit is unspecified' do
+      stub_const("#{described_class}::USER_MSG_LIMIT", 200)
+      truncated = 'foofoofoofoofoofoofoofoofoofoo foofoofoofoofoof...'
+
+      output = filter(long_text)
+
+      expect(output).to eq(truncated)
+    end
+
+    it 'prepends the user message' do
+      truncated = <<~TEXT
+        _The text is longer than 50 characters and has been visually truncated._
+
+        foofoofoofoofoofoofoofoofoofoo foofoofoofoofoof...
+      TEXT
+
+      output = filter(long_text)
+
+      expect(output).to eq(truncated.strip)
+    end
+
+    it 'does nothing to a short-enough text' do
+      output = filter(short_text, limit: short_text.bytesize)
+
+      expect(output).to eq(short_text)
+    end
+
+    it 'truncates UTF-8 text by bytes, on a character boundary' do
+      utf8_text = '日本語の文字が大きい'
+      truncated = '日...'
 
-    expect(filter(utf8_text, limit: truncated.bytesize)).to eq(truncated)
-    expect(filter(utf8_text, limit: utf8_text.bytesize)).to eq(utf8_text)
-    expect(filter(utf8_text, limit: utf8_text.mb_chars.size)).not_to eq(utf8_text)
+      expect(filter(utf8_text, limit: truncated.bytesize)).to eq(truncated)
+      expect(filter(utf8_text, limit: utf8_text.bytesize)).to eq(utf8_text)
+      expect(filter(utf8_text, limit: utf8_text.mb_chars.size)).not_to eq(utf8_text)
+    end
   end
 end
diff --git a/spec/lib/gitlab/utils/nokogiri_spec.rb b/spec/lib/gitlab/utils/nokogiri_spec.rb
new file mode 100644
index 00000000000..90f137f53c8
--- /dev/null
+++ b/spec/lib/gitlab/utils/nokogiri_spec.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Utils::Nokogiri do
+  describe '#css_to_xpath' do
+    using RSpec::Parameterized::TableSyntax
+
+    where(:css, :xpath) do
+      'img'                               | "descendant-or-self::img"
+      'a.gfm'                             | "descendant-or-self::a[contains(concat(' ',normalize-space(@class),' '),' gfm ')]"
+      'a:not(.gfm)'                       | "descendant-or-self::a[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]"
+      'video, audio'                      | "descendant-or-self::video|descendant-or-self::audio"
+      '[data-math-style]'                 | "descendant-or-self::*[@data-math-style]"
+      '[data-mermaid-style]'              | "descendant-or-self::*[@data-mermaid-style]"
+      '.js-render-metrics'                | "descendant-or-self::*[contains(concat(' ',normalize-space(@class),' '),' js-render-metrics ')]"
+      'h1, h2, h3, h4, h5, h6'            | "descendant-or-self::h1|descendant-or-self::h2|descendant-or-self::h3|descendant-or-self::h4|descendant-or-self::h5|descendant-or-self::h6"
+      'pre.code.language-math'            | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' code ') and contains(concat(' ',normalize-space(@class),' '),' language-math ')]"
+      'pre > code[lang="plantuml"]'       | "descendant-or-self::pre/code[@lang=\"plantuml\"]"
+      'pre[lang="mermaid"] > code'        | "descendant-or-self::pre[@lang=\"mermaid\"]/code"
+      'pre.language-suggestion'           | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' language-suggestion ')]"
+      'pre.language-suggestion > code'    | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' language-suggestion ')]/code"
+      'a.gfm[data-reference-type="user"]' | "descendant-or-self::a[contains(concat(' ',normalize-space(@class),' '),' gfm ') and @data-reference-type=\"user\"]"
+      'a:not(.gfm), img:not(.gfm), video:not(.gfm), audio:not(.gfm)'                        | "descendant-or-self::a[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::img[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::video[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::audio[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]"
+      'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code' | "descendant-or-self::pre[not(@data-math-style) and not(@data-mermaid-style) and not(@data-kroki-style)]/code"
+    end
+
+    with_them do
+      it 'generates the xpath' do
+        expect(described_class.css_to_xpath(css)).to eq xpath
+      end
+    end
+  end
+end
author	GitLab Bot <gitlab-bot@gitlab.com>	2021-05-31 11:40:45 +0000
committer	GitLab Bot <gitlab-bot@gitlab.com>	2021-05-31 11:41:14 +0000
commit	279809e18f6949adb2543a45c0c800f549d35541 (patch)
tree	aac5347ff15bcdeadf7a00d7866cad76349c0fe4
parent	fcc1904c6fa3d5d71ca7f78470cf4c19ea888e1c (diff)
download	gitlab-ce-279809e18f6949adb2543a45c0c800f549d35541.tar.gz