summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-05-31 11:40:45 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2021-05-31 11:41:14 +0000
commit279809e18f6949adb2543a45c0c800f549d35541 (patch)
treeaac5347ff15bcdeadf7a00d7866cad76349c0fe4
parentfcc1904c6fa3d5d71ca7f78470cf4c19ea888e1c (diff)
downloadgitlab-ce-279809e18f6949adb2543a45c0c800f549d35541.tar.gz
Add latest changes from gitlab-org/security/gitlab@13-11-stable-ee
-rw-r--r--app/helpers/markup_helper.rb1
-rw-r--r--lib/banzai/filter/absolute_link_filter.rb5
-rw-r--r--lib/banzai/filter/ascii_doc_post_processing_filter.rb10
-rw-r--r--lib/banzai/filter/base_relative_link_filter.rb15
-rw-r--r--lib/banzai/filter/color_filter.rb5
-rw-r--r--lib/banzai/filter/custom_emoji_filter.rb2
-rw-r--r--lib/banzai/filter/emoji_filter.rb2
-rw-r--r--lib/banzai/filter/footnote_filter.rb12
-rw-r--r--lib/banzai/filter/gollum_tags_filter.rb2
-rw-r--r--lib/banzai/filter/image_lazy_load_filter.rb5
-rw-r--r--lib/banzai/filter/inline_diff_filter.rb2
-rw-r--r--lib/banzai/filter/inline_metrics_redactor_filter.rb3
-rw-r--r--lib/banzai/filter/kroki_filter.rb5
-rw-r--r--lib/banzai/filter/markdown_post_escape_filter.rb9
-rw-r--r--lib/banzai/filter/math_filter.rb9
-rw-r--r--lib/banzai/filter/mermaid_filter.rb5
-rw-r--r--lib/banzai/filter/plantuml_filter.rb7
-rw-r--r--lib/banzai/filter/suggestion_filter.rb5
-rw-r--r--lib/banzai/filter/syntax_highlight_filter.rb5
-rw-r--r--lib/banzai/filter/table_of_contents_filter.rb5
-rw-r--r--lib/banzai/filter/truncate_source_filter.rb21
-rw-r--r--lib/banzai/filter/wiki_link_filter.rb13
-rw-r--r--lib/gitlab/diff/suggestions_parser.rb5
-rw-r--r--lib/gitlab/utils/nokogiri.rb24
-rw-r--r--spec/helpers/markup_helper_spec.rb7
-rw-r--r--spec/lib/banzai/filter/truncate_source_filter_spec.rb70
-rw-r--r--spec/lib/gitlab/utils/nokogiri_spec.rb34
27 files changed, 235 insertions, 53 deletions
diff --git a/app/helpers/markup_helper.rb b/app/helpers/markup_helper.rb
index ad206d0e5b5..6c19fcc9121 100644
--- a/app/helpers/markup_helper.rb
+++ b/app/helpers/markup_helper.rb
@@ -118,6 +118,7 @@ module MarkupHelper
def markup(file_name, text, context = {})
context[:project] ||= @project
+ context[:text_source] ||= :blob
html = context.delete(:rendered) || markup_unsafe(file_name, text, context)
prepare_for_rendering(html, context)
end
diff --git a/lib/banzai/filter/absolute_link_filter.rb b/lib/banzai/filter/absolute_link_filter.rb
index a9bdb004c4b..cc7bf3ed556 100644
--- a/lib/banzai/filter/absolute_link_filter.rb
+++ b/lib/banzai/filter/absolute_link_filter.rb
@@ -6,10 +6,13 @@ module Banzai
module Filter
# HTML filter that converts relative urls into absolute ones.
class AbsoluteLinkFilter < HTML::Pipeline::Filter
+ CSS = 'a.gfm'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
return doc unless context[:only_path] == false
- doc.search('a.gfm').each do |el|
+ doc.xpath(XPATH).each do |el|
process_link_attr el.attribute('href')
end
diff --git a/lib/banzai/filter/ascii_doc_post_processing_filter.rb b/lib/banzai/filter/ascii_doc_post_processing_filter.rb
index 09f0fd7df45..83c729e13b5 100644
--- a/lib/banzai/filter/ascii_doc_post_processing_filter.rb
+++ b/lib/banzai/filter/ascii_doc_post_processing_filter.rb
@@ -3,14 +3,20 @@
module Banzai
module Filter
class AsciiDocPostProcessingFilter < HTML::Pipeline::Filter
+ CSS_MATH = '[data-math-style]'
+ XPATH_MATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MATH).freeze
+ CSS_MERM = '[data-mermaid-style]'
+ XPATH_MERM = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MERM).freeze
+
def call
- doc.search('[data-math-style]').each do |node|
+ doc.xpath(XPATH_MATH).each do |node|
node.set_attribute('class', 'code math js-render-math')
end
- doc.search('[data-mermaid-style]').each do |node|
+ doc.xpath(XPATH_MERM).each do |node|
node.set_attribute('class', 'js-render-mermaid')
end
+
doc
end
end
diff --git a/lib/banzai/filter/base_relative_link_filter.rb b/lib/banzai/filter/base_relative_link_filter.rb
index fd526df4c48..60d09b69a10 100644
--- a/lib/banzai/filter/base_relative_link_filter.rb
+++ b/lib/banzai/filter/base_relative_link_filter.rb
@@ -7,23 +7,20 @@ module Banzai
class BaseRelativeLinkFilter < HTML::Pipeline::Filter
include Gitlab::Utils::StrongMemoize
+ CSS = 'a:not(.gfm), img:not(.gfm), video:not(.gfm), audio:not(.gfm)'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
protected
def linkable_attributes
strong_memoize(:linkable_attributes) do
attrs = []
- attrs += doc.search('a:not(.gfm)').map do |el|
- el.attribute('href')
- end
-
- attrs += doc.search('img:not(.gfm), video:not(.gfm), audio:not(.gfm)').flat_map do |el|
- [el.attribute('src'), el.attribute('data-src')]
+ attrs += doc.xpath(XPATH).flat_map do |el|
+ [el.attribute('href'), el.attribute('src'), el.attribute('data-src')]
end
- attrs.reject do |attr|
- attr.blank? || attr.value.start_with?('//')
- end
+ attrs.reject { |attr| attr.blank? || attr.value.start_with?('//') }
end
end
diff --git a/lib/banzai/filter/color_filter.rb b/lib/banzai/filter/color_filter.rb
index 0aca7441638..58e9b8cdba1 100644
--- a/lib/banzai/filter/color_filter.rb
+++ b/lib/banzai/filter/color_filter.rb
@@ -7,8 +7,11 @@ module Banzai
class ColorFilter < HTML::Pipeline::Filter
COLOR_CHIP_CLASS = 'gfm-color_chip'
+ CSS = 'code'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
- doc.css('code').each do |node|
+ doc.xpath(XPATH).each do |node|
color = ColorParser.parse(node.content)
node << color_chip(color) if color
end
diff --git a/lib/banzai/filter/custom_emoji_filter.rb b/lib/banzai/filter/custom_emoji_filter.rb
index 1ee8f4e31e8..e26c5d36f2b 100644
--- a/lib/banzai/filter/custom_emoji_filter.rb
+++ b/lib/banzai/filter/custom_emoji_filter.rb
@@ -9,7 +9,7 @@ module Banzai
return doc unless context[:project]
return doc unless Feature.enabled?(:custom_emoji, context[:project])
- doc.search(".//text()").each do |node|
+ doc.xpath('descendant-or-self::text()').each do |node|
content = node.to_html
next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb
index 8952a3ff6b4..9d24bf028b6 100644
--- a/lib/banzai/filter/emoji_filter.rb
+++ b/lib/banzai/filter/emoji_filter.rb
@@ -11,7 +11,7 @@ module Banzai
IGNORE_UNICODE_EMOJIS = %w(™ © ®).freeze
def call
- doc.search(".//text()").each do |node|
+ doc.xpath('descendant-or-self::text()').each do |node|
content = node.to_html
next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb
index 5474242e03c..0f856dc0eb9 100644
--- a/lib/banzai/filter/footnote_filter.rb
+++ b/lib/banzai/filter/footnote_filter.rb
@@ -23,17 +23,23 @@ module Banzai
FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}\d+\z/.freeze
FOOTNOTE_START_NUMBER = 1
+ CSS_SECTION = "ol > li[id=#{FOOTNOTE_ID_PREFIX}#{FOOTNOTE_START_NUMBER}]"
+ XPATH_SECTION = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_SECTION).freeze
+ CSS_FOOTNOTE = 'sup > a[id]'
+ XPATH_FOOTNOTE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_FOOTNOTE).freeze
+
def call
- return doc unless first_footnote = doc.at_css("ol > li[id=#{fn_id(FOOTNOTE_START_NUMBER)}]")
+ return doc unless first_footnote = doc.at_xpath(XPATH_SECTION)
# Sanitization stripped off the section wrapper - add it back in
first_footnote.parent.wrap('<section class="footnotes">')
rand_suffix = "-#{random_number}"
modified_footnotes = {}
- doc.css('sup > a[id]').each do |link_node|
+ doc.xpath(XPATH_FOOTNOTE).each do |link_node|
ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX)
- footnote_node = doc.at_css("li[id=#{fn_id(ref_num)}]")
+ node_xpath = Gitlab::Utils::Nokogiri.css_to_xpath("li[id=#{fn_id(ref_num)}]")
+ footnote_node = doc.at_xpath(node_xpath)
if INTEGER_PATTERN.match?(ref_num) && (footnote_node || modified_footnotes[ref_num])
link_node[:href] += rand_suffix
diff --git a/lib/banzai/filter/gollum_tags_filter.rb b/lib/banzai/filter/gollum_tags_filter.rb
index 6de9f2b86f6..0548d5a9997 100644
--- a/lib/banzai/filter/gollum_tags_filter.rb
+++ b/lib/banzai/filter/gollum_tags_filter.rb
@@ -60,7 +60,7 @@ module Banzai
IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set
def call
- doc.search(".//text()").each do |node|
+ doc.xpath('descendant-or-self::text()').each do |node|
next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
next unless node.content =~ TAGS_PATTERN
diff --git a/lib/banzai/filter/image_lazy_load_filter.rb b/lib/banzai/filter/image_lazy_load_filter.rb
index d8b9eb29cf5..916c135b777 100644
--- a/lib/banzai/filter/image_lazy_load_filter.rb
+++ b/lib/banzai/filter/image_lazy_load_filter.rb
@@ -6,8 +6,11 @@ module Banzai
# HTML filter that moves the value of image `src` attributes to `data-src`
# so they can be lazy loaded.
class ImageLazyLoadFilter < HTML::Pipeline::Filter
+ CSS = 'img'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
- doc.xpath('descendant-or-self::img').each do |img|
+ doc.xpath(XPATH).each do |img|
img.add_class('lazy')
img['data-src'] = img['src']
img['src'] = LazyImageTagHelper.placeholder_image
diff --git a/lib/banzai/filter/inline_diff_filter.rb b/lib/banzai/filter/inline_diff_filter.rb
index 5a1c0bee32d..e47ff15e7b7 100644
--- a/lib/banzai/filter/inline_diff_filter.rb
+++ b/lib/banzai/filter/inline_diff_filter.rb
@@ -7,7 +7,7 @@ module Banzai
IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set
def call
- doc.search(".//text()").each do |node|
+ doc.xpath('descendant-or-self::text()').each do |node|
next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)
content = node.to_html
diff --git a/lib/banzai/filter/inline_metrics_redactor_filter.rb b/lib/banzai/filter/inline_metrics_redactor_filter.rb
index 2259115acfc..b256815ae84 100644
--- a/lib/banzai/filter/inline_metrics_redactor_filter.rb
+++ b/lib/banzai/filter/inline_metrics_redactor_filter.rb
@@ -8,6 +8,7 @@ module Banzai
include Gitlab::Utils::StrongMemoize
METRICS_CSS_CLASS = '.js-render-metrics'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(METRICS_CSS_CLASS).freeze
EMBED_LIMIT = 100
Route = Struct.new(:regex, :permission)
@@ -41,7 +42,7 @@ module Banzai
# @return [Nokogiri::XML::NodeSet]
def nodes
strong_memoize(:nodes) do
- nodes = doc.css(METRICS_CSS_CLASS)
+ nodes = doc.xpath(XPATH)
nodes.drop(EMBED_LIMIT).each(&:remove)
nodes
diff --git a/lib/banzai/filter/kroki_filter.rb b/lib/banzai/filter/kroki_filter.rb
index dbd4de32a47..3803302c324 100644
--- a/lib/banzai/filter/kroki_filter.rb
+++ b/lib/banzai/filter/kroki_filter.rb
@@ -15,10 +15,11 @@ module Banzai
.map { |diagram_type| %(pre[lang="#{diagram_type}"] > code) }
.join(', ')
- return doc unless doc.at(diagram_selectors)
+ xpath = Gitlab::Utils::Nokogiri.css_to_xpath(diagram_selectors)
+ return doc unless doc.at_xpath(xpath)
diagram_format = "svg"
- doc.css(diagram_selectors).each do |node|
+ doc.xpath(xpath).each do |node|
diagram_type = node.parent['lang']
img_tag = Nokogiri::HTML::DocumentFragment.parse(%(<img src="#{create_image_src(diagram_type, diagram_format, node.content)}"/>))
node.parent.replace(img_tag)
diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb
index ad32e9afbf5..b69afdcfebe 100644
--- a/lib/banzai/filter/markdown_post_escape_filter.rb
+++ b/lib/banzai/filter/markdown_post_escape_filter.rb
@@ -8,6 +8,11 @@ module Banzai
NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze
SPAN_REGEX = %r{<span>(.*?)</span>}.freeze
+ CSS_A = 'a'
+ XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze
+ CSS_CODE = 'code'
+ XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze
+
def call
return doc unless result[:escaped_literals]
@@ -24,12 +29,12 @@ module Banzai
# Banzai::Renderer::CommonMark::HTML. However, we eventually want to use
# the built-in compiled renderer, rather than the ruby version, for speed.
# So let's do this work here.
- doc.css('a').each do |node|
+ doc.xpath(XPATH_A).each do |node|
node.attributes['href'].value = node.attributes['href'].value.gsub(SPAN_REGEX, '\1') if node.attributes['href']
node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title']
end
- doc.css('code').each do |node|
+ doc.xpath(XPATH_CODE).each do |node|
node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang']
end
diff --git a/lib/banzai/filter/math_filter.rb b/lib/banzai/filter/math_filter.rb
index 2247984b86d..53dafe45fb3 100644
--- a/lib/banzai/filter/math_filter.rb
+++ b/lib/banzai/filter/math_filter.rb
@@ -10,6 +10,11 @@ module Banzai
# HTML filter that adds class="code math" and removes the dollar sign in $`2+2`$.
#
class MathFilter < HTML::Pipeline::Filter
+ CSS_MATH = 'pre.code.language-math'
+ XPATH_MATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MATH).freeze
+ CSS_CODE = 'code'
+ XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze
+
# Attribute indicating inline or display math.
STYLE_ATTRIBUTE = 'data-math-style'
@@ -21,7 +26,7 @@ module Banzai
DOLLAR_SIGN = '$'
def call
- doc.css('code').each do |code|
+ doc.xpath(XPATH_CODE).each do |code|
closing = code.next
opening = code.previous
@@ -39,7 +44,7 @@ module Banzai
end
end
- doc.css('pre.code.language-math').each do |el|
+ doc.xpath(XPATH_MATH).each do |el|
el[STYLE_ATTRIBUTE] = 'display'
el[:class] += " #{TAG_CLASS}"
end
diff --git a/lib/banzai/filter/mermaid_filter.rb b/lib/banzai/filter/mermaid_filter.rb
index f0adb83af8a..aaaf851ccf0 100644
--- a/lib/banzai/filter/mermaid_filter.rb
+++ b/lib/banzai/filter/mermaid_filter.rb
@@ -4,8 +4,11 @@
module Banzai
module Filter
class MermaidFilter < HTML::Pipeline::Filter
+ CSS = 'pre[lang="mermaid"] > code'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
- doc.css('pre[lang="mermaid"] > code').add_class('js-render-mermaid')
+ doc.xpath(XPATH).add_class('js-render-mermaid')
doc
end
diff --git a/lib/banzai/filter/plantuml_filter.rb b/lib/banzai/filter/plantuml_filter.rb
index 37d4126c1ba..93370178a61 100644
--- a/lib/banzai/filter/plantuml_filter.rb
+++ b/lib/banzai/filter/plantuml_filter.rb
@@ -8,12 +8,15 @@ module Banzai
# HTML that replaces all `code plantuml` tags with PlantUML img tags.
#
class PlantumlFilter < HTML::Pipeline::Filter
+ CSS = 'pre > code[lang="plantuml"]'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
- return doc unless settings.plantuml_enabled? && doc.at('pre > code[lang="plantuml"]')
+ return doc unless settings.plantuml_enabled? && doc.at_xpath(XPATH)
plantuml_setup
- doc.css('pre > code[lang="plantuml"]').each do |node|
+ doc.xpath(XPATH).each do |node|
img_tag = Nokogiri::HTML::DocumentFragment.parse(
Asciidoctor::PlantUml::Processor.plantuml_content(node.content, {}))
node.parent.replace(img_tag)
diff --git a/lib/banzai/filter/suggestion_filter.rb b/lib/banzai/filter/suggestion_filter.rb
index 56a14ec0737..aa1fcb1021c 100644
--- a/lib/banzai/filter/suggestion_filter.rb
+++ b/lib/banzai/filter/suggestion_filter.rb
@@ -7,10 +7,13 @@ module Banzai
# Class used for tagging elements that should be rendered
TAG_CLASS = 'js-render-suggestion'
+ CSS = 'pre.language-suggestion > code'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
return doc unless suggestions_filter_enabled?
- doc.search('pre.language-suggestion > code').each do |node|
+ doc.xpath(XPATH).each do |node|
node.add_class(TAG_CLASS)
end
diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb
index 731a2bb4c77..eceeb113555 100644
--- a/lib/banzai/filter/syntax_highlight_filter.rb
+++ b/lib/banzai/filter/syntax_highlight_filter.rb
@@ -14,8 +14,11 @@ module Banzai
PARAMS_DELIMITER = ':'
LANG_PARAMS_ATTR = 'data-lang-params'
+ CSS = 'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
- doc.search('pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code').each do |node|
+ doc.xpath(XPATH).each do |node|
highlight_node(node)
end
diff --git a/lib/banzai/filter/table_of_contents_filter.rb b/lib/banzai/filter/table_of_contents_filter.rb
index b362607aed2..13ca9cde567 100644
--- a/lib/banzai/filter/table_of_contents_filter.rb
+++ b/lib/banzai/filter/table_of_contents_filter.rb
@@ -19,6 +19,9 @@ module Banzai
class TableOfContentsFilter < HTML::Pipeline::Filter
include Gitlab::Utils::Markdown
+ CSS = 'h1, h2, h3, h4, h5, h6'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
def call
return doc if context[:no_header_anchors]
@@ -27,7 +30,7 @@ module Banzai
headers = Hash.new(0)
header_root = current_header = HeaderNode.new
- doc.css('h1, h2, h3, h4, h5, h6').each do |node|
+ doc.xpath(XPATH).each do |node|
if header_content = node.children.first
id = string_to_anchor(node.text)
diff --git a/lib/banzai/filter/truncate_source_filter.rb b/lib/banzai/filter/truncate_source_filter.rb
index 44f88b253d9..a21d4a44295 100644
--- a/lib/banzai/filter/truncate_source_filter.rb
+++ b/lib/banzai/filter/truncate_source_filter.rb
@@ -3,12 +3,29 @@
module Banzai
module Filter
class TruncateSourceFilter < HTML::Pipeline::TextFilter
+ CHARACTER_COUNT_LIMIT = 1.megabyte
+ USER_MSG_LIMIT = 10_000
+
def call
- return text unless context.key?(:limit)
+ # don't truncate if it's a :blob and no limit is set
+ return text if context[:text_source] == :blob && !context.key?(:limit)
+
+ limit = context[:limit] || CHARACTER_COUNT_LIMIT
+
+ # no sense in allowing `truncate_bytes` to duplicate a large
+ # string unless it's too big
+ return text if text.bytesize <= limit
# Use three dots instead of the ellipsis Unicode character because
# some clients show the raw Unicode value in the merge commit.
- text.truncate_bytes(context[:limit], omission: '...')
+ trunc = text.truncate_bytes(limit, omission: '...')
+
+ # allows us to indicate to the user that what they see is a truncated copy
+ if limit > USER_MSG_LIMIT
+ trunc.prepend("_The text is longer than #{limit} characters and has been visually truncated._\n\n")
+ end
+
+ trunc
end
end
end
diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb
index 44f13612fde..2b95d87ff8e 100644
--- a/lib/banzai/filter/wiki_link_filter.rb
+++ b/lib/banzai/filter/wiki_link_filter.rb
@@ -10,14 +10,21 @@ module Banzai
class WikiLinkFilter < HTML::Pipeline::Filter
include Gitlab::Utils::SanitizeNodeLink
+ CSS_A = 'a:not(.gfm)'
+ XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze
+ CSS_VA = 'video, audio'
+ XPATH_VA = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_VA).freeze
+ CSS_IMG = 'img'
+ XPATH_IMG = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_IMG).freeze
+
def call
return doc unless wiki?
- doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) }
+ doc.xpath(XPATH_A).each { |el| process_link(el.attribute('href'), el) }
- doc.search('video, audio').each { |el| process_link(el.attribute('src'), el) }
+ doc.xpath(XPATH_VA).each { |el| process_link(el.attribute('src'), el) }
- doc.search('img').each do |el|
+ doc.xpath(XPATH_IMG).each do |el|
attr = el.attribute('data-src') || el.attribute('src')
process_link(attr, el)
diff --git a/lib/gitlab/diff/suggestions_parser.rb b/lib/gitlab/diff/suggestions_parser.rb
index f3e6fc455ac..6f126147113 100644
--- a/lib/gitlab/diff/suggestions_parser.rb
+++ b/lib/gitlab/diff/suggestions_parser.rb
@@ -6,6 +6,9 @@ module Gitlab
# Matches for instance "-1", "+1" or "-1+2".
SUGGESTION_CONTEXT = /^(\-(?<above>\d+))?(\+(?<below>\d+))?$/.freeze
+ CSS = 'pre.language-suggestion'
+ XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze
+
class << self
# Returns an array of Gitlab::Diff::Suggestion which represents each
# suggestion in the given text.
@@ -17,7 +20,7 @@ module Gitlab
no_original_data: true,
suggestions_filter_enabled: supports_suggestion)
doc = Nokogiri::HTML(html)
- suggestion_nodes = doc.search('pre.language-suggestion')
+ suggestion_nodes = doc.xpath(XPATH)
return [] if suggestion_nodes.empty?
diff --git a/lib/gitlab/utils/nokogiri.rb b/lib/gitlab/utils/nokogiri.rb
new file mode 100644
index 00000000000..4b37bb7e5ea
--- /dev/null
+++ b/lib/gitlab/utils/nokogiri.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Utils
+ class Nokogiri
+ class << self
+ # Use Nokogiri to convert a css selector into an xpath selector.
+ # Nokogiri can use css selectors with `doc.search()`. However
+ # for large node trees, it is _much_ slower than using xpath,
+ # by several orders of magnitude.
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/329186
+ def css_to_xpath(css)
+ xpath = ::Nokogiri::CSS.xpath_for(css)
+
+ # Due to https://github.com/sparklemotion/nokogiri/issues/572,
+ # we remove the leading `//` and add `descendant-or-self::`
+ # in order to ensure we're searching from this node and all
+ # descendants.
+ xpath.map { |t| "descendant-or-self::#{t[2..-1]}" }.join('|')
+ end
+ end
+ end
+ end
+end
diff --git a/spec/helpers/markup_helper_spec.rb b/spec/helpers/markup_helper_spec.rb
index 00a59f037e0..e946857ac77 100644
--- a/spec/helpers/markup_helper_spec.rb
+++ b/spec/helpers/markup_helper_spec.rb
@@ -418,6 +418,13 @@ FooBar
describe '#markup' do
let(:content) { 'Noël' }
+ it 'sets the :text_source to :blob in the context' do
+ context = {}
+ helper.markup('foo.md', content, context)
+
+ expect(context).to include(text_source: :blob)
+ end
+
it 'preserves encoding' do
expect(content.encoding.name).to eq('UTF-8')
expect(helper.markup('foo.rst', content).encoding.name).to eq('UTF-8')
diff --git a/spec/lib/banzai/filter/truncate_source_filter_spec.rb b/spec/lib/banzai/filter/truncate_source_filter_spec.rb
index d5eb8b738b1..8970aa1d382 100644
--- a/spec/lib/banzai/filter/truncate_source_filter_spec.rb
+++ b/spec/lib/banzai/filter/truncate_source_filter_spec.rb
@@ -8,24 +8,68 @@ RSpec.describe Banzai::Filter::TruncateSourceFilter do
let(:short_text) { 'foo' * 10 }
let(:long_text) { ([short_text] * 10).join(' ') }
- it 'does nothing when limit is unspecified' do
- output = filter(long_text)
-
- expect(output).to eq(long_text)
+ before do
+ stub_const("#{described_class}::CHARACTER_COUNT_LIMIT", 50)
+ stub_const("#{described_class}::USER_MSG_LIMIT", 20)
end
- it 'does nothing to a short-enough text' do
- output = filter(short_text, limit: short_text.bytesize)
+ context 'when markdown belongs to a blob' do
+ it 'does nothing when limit is unspecified' do
+ output = filter(long_text, text_source: :blob)
+
+ expect(output).to eq(long_text)
+ end
+
+ it 'truncates normally when limit specified' do
+ truncated = 'foofoof...'
+
+ output = filter(long_text, text_source: :blob, limit: 10)
- expect(output).to eq(short_text)
+ expect(output).to eq(truncated)
+ end
end
- it 'truncates UTF-8 text by bytes, on a character boundary' do
- utf8_text = '日本語の文字が大きい'
- truncated = '日...'
+ context 'when markdown belongs to a field (non-blob)' do
+ it 'does nothing when limit is greater' do
+ output = filter(long_text, limit: 1.megabyte)
+
+ expect(output).to eq(long_text)
+ end
+
+ it 'truncates to the default when limit is unspecified' do
+ stub_const("#{described_class}::USER_MSG_LIMIT", 200)
+ truncated = 'foofoofoofoofoofoofoofoofoofoo foofoofoofoofoof...'
+
+ output = filter(long_text)
+
+ expect(output).to eq(truncated)
+ end
+
+ it 'prepends the user message' do
+ truncated = <<~TEXT
+ _The text is longer than 50 characters and has been visually truncated._
+
+ foofoofoofoofoofoofoofoofoofoo foofoofoofoofoof...
+ TEXT
+
+ output = filter(long_text)
+
+ expect(output).to eq(truncated.strip)
+ end
+
+ it 'does nothing to a short-enough text' do
+ output = filter(short_text, limit: short_text.bytesize)
+
+ expect(output).to eq(short_text)
+ end
+
+ it 'truncates UTF-8 text by bytes, on a character boundary' do
+ utf8_text = '日本語の文字が大きい'
+ truncated = '日...'
- expect(filter(utf8_text, limit: truncated.bytesize)).to eq(truncated)
- expect(filter(utf8_text, limit: utf8_text.bytesize)).to eq(utf8_text)
- expect(filter(utf8_text, limit: utf8_text.mb_chars.size)).not_to eq(utf8_text)
+ expect(filter(utf8_text, limit: truncated.bytesize)).to eq(truncated)
+ expect(filter(utf8_text, limit: utf8_text.bytesize)).to eq(utf8_text)
+ expect(filter(utf8_text, limit: utf8_text.mb_chars.size)).not_to eq(utf8_text)
+ end
end
end
diff --git a/spec/lib/gitlab/utils/nokogiri_spec.rb b/spec/lib/gitlab/utils/nokogiri_spec.rb
new file mode 100644
index 00000000000..90f137f53c8
--- /dev/null
+++ b/spec/lib/gitlab/utils/nokogiri_spec.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Utils::Nokogiri do
+ describe '#css_to_xpath' do
+ using RSpec::Parameterized::TableSyntax
+
+ where(:css, :xpath) do
+ 'img' | "descendant-or-self::img"
+ 'a.gfm' | "descendant-or-self::a[contains(concat(' ',normalize-space(@class),' '),' gfm ')]"
+ 'a:not(.gfm)' | "descendant-or-self::a[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]"
+ 'video, audio' | "descendant-or-self::video|descendant-or-self::audio"
+ '[data-math-style]' | "descendant-or-self::*[@data-math-style]"
+ '[data-mermaid-style]' | "descendant-or-self::*[@data-mermaid-style]"
+ '.js-render-metrics' | "descendant-or-self::*[contains(concat(' ',normalize-space(@class),' '),' js-render-metrics ')]"
+ 'h1, h2, h3, h4, h5, h6' | "descendant-or-self::h1|descendant-or-self::h2|descendant-or-self::h3|descendant-or-self::h4|descendant-or-self::h5|descendant-or-self::h6"
+ 'pre.code.language-math' | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' code ') and contains(concat(' ',normalize-space(@class),' '),' language-math ')]"
+ 'pre > code[lang="plantuml"]' | "descendant-or-self::pre/code[@lang=\"plantuml\"]"
+ 'pre[lang="mermaid"] > code' | "descendant-or-self::pre[@lang=\"mermaid\"]/code"
+ 'pre.language-suggestion' | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' language-suggestion ')]"
+ 'pre.language-suggestion > code' | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' language-suggestion ')]/code"
+ 'a.gfm[data-reference-type="user"]' | "descendant-or-self::a[contains(concat(' ',normalize-space(@class),' '),' gfm ') and @data-reference-type=\"user\"]"
+ 'a:not(.gfm), img:not(.gfm), video:not(.gfm), audio:not(.gfm)' | "descendant-or-self::a[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::img[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::video[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::audio[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]"
+ 'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code' | "descendant-or-self::pre[not(@data-math-style) and not(@data-mermaid-style) and not(@data-kroki-style)]/code"
+ end
+
+ with_them do
+ it 'generates the xpath' do
+ expect(described_class.css_to_xpath(css)).to eq xpath
+ end
+ end
+ end
+end