diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-05-31 11:40:45 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-05-31 11:41:14 +0000 |
commit | 279809e18f6949adb2543a45c0c800f549d35541 (patch) | |
tree | aac5347ff15bcdeadf7a00d7866cad76349c0fe4 | |
parent | fcc1904c6fa3d5d71ca7f78470cf4c19ea888e1c (diff) | |
download | gitlab-ce-279809e18f6949adb2543a45c0c800f549d35541.tar.gz |
Add latest changes from gitlab-org/security/gitlab@13-11-stable-ee
27 files changed, 235 insertions, 53 deletions
diff --git a/app/helpers/markup_helper.rb b/app/helpers/markup_helper.rb index ad206d0e5b5..6c19fcc9121 100644 --- a/app/helpers/markup_helper.rb +++ b/app/helpers/markup_helper.rb @@ -118,6 +118,7 @@ module MarkupHelper def markup(file_name, text, context = {}) context[:project] ||= @project + context[:text_source] ||= :blob html = context.delete(:rendered) || markup_unsafe(file_name, text, context) prepare_for_rendering(html, context) end diff --git a/lib/banzai/filter/absolute_link_filter.rb b/lib/banzai/filter/absolute_link_filter.rb index a9bdb004c4b..cc7bf3ed556 100644 --- a/lib/banzai/filter/absolute_link_filter.rb +++ b/lib/banzai/filter/absolute_link_filter.rb @@ -6,10 +6,13 @@ module Banzai module Filter # HTML filter that converts relative urls into absolute ones. class AbsoluteLinkFilter < HTML::Pipeline::Filter + CSS = 'a.gfm' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call return doc unless context[:only_path] == false - doc.search('a.gfm').each do |el| + doc.xpath(XPATH).each do |el| process_link_attr el.attribute('href') end diff --git a/lib/banzai/filter/ascii_doc_post_processing_filter.rb b/lib/banzai/filter/ascii_doc_post_processing_filter.rb index 09f0fd7df45..83c729e13b5 100644 --- a/lib/banzai/filter/ascii_doc_post_processing_filter.rb +++ b/lib/banzai/filter/ascii_doc_post_processing_filter.rb @@ -3,14 +3,20 @@ module Banzai module Filter class AsciiDocPostProcessingFilter < HTML::Pipeline::Filter + CSS_MATH = '[data-math-style]' + XPATH_MATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MATH).freeze + CSS_MERM = '[data-mermaid-style]' + XPATH_MERM = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MERM).freeze + def call - doc.search('[data-math-style]').each do |node| + doc.xpath(XPATH_MATH).each do |node| node.set_attribute('class', 'code math js-render-math') end - doc.search('[data-mermaid-style]').each do |node| + doc.xpath(XPATH_MERM).each do |node| node.set_attribute('class', 'js-render-mermaid') end + doc end end diff --git a/lib/banzai/filter/base_relative_link_filter.rb b/lib/banzai/filter/base_relative_link_filter.rb index fd526df4c48..60d09b69a10 100644 --- a/lib/banzai/filter/base_relative_link_filter.rb +++ b/lib/banzai/filter/base_relative_link_filter.rb @@ -7,23 +7,20 @@ module Banzai class BaseRelativeLinkFilter < HTML::Pipeline::Filter include Gitlab::Utils::StrongMemoize + CSS = 'a:not(.gfm), img:not(.gfm), video:not(.gfm), audio:not(.gfm)' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + protected def linkable_attributes strong_memoize(:linkable_attributes) do attrs = [] - attrs += doc.search('a:not(.gfm)').map do |el| - el.attribute('href') - end - - attrs += doc.search('img:not(.gfm), video:not(.gfm), audio:not(.gfm)').flat_map do |el| - [el.attribute('src'), el.attribute('data-src')] + attrs += doc.xpath(XPATH).flat_map do |el| + [el.attribute('href'), el.attribute('src'), el.attribute('data-src')] end - attrs.reject do |attr| - attr.blank? || attr.value.start_with?('//') - end + attrs.reject { |attr| attr.blank? || attr.value.start_with?('//') } end end diff --git a/lib/banzai/filter/color_filter.rb b/lib/banzai/filter/color_filter.rb index 0aca7441638..58e9b8cdba1 100644 --- a/lib/banzai/filter/color_filter.rb +++ b/lib/banzai/filter/color_filter.rb @@ -7,8 +7,11 @@ module Banzai class ColorFilter < HTML::Pipeline::Filter COLOR_CHIP_CLASS = 'gfm-color_chip' + CSS = 'code' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call - doc.css('code').each do |node| + doc.xpath(XPATH).each do |node| color = ColorParser.parse(node.content) node << color_chip(color) if color end diff --git a/lib/banzai/filter/custom_emoji_filter.rb b/lib/banzai/filter/custom_emoji_filter.rb index 1ee8f4e31e8..e26c5d36f2b 100644 --- a/lib/banzai/filter/custom_emoji_filter.rb +++ b/lib/banzai/filter/custom_emoji_filter.rb @@ -9,7 +9,7 @@ module Banzai return doc unless context[:project] return doc unless Feature.enabled?(:custom_emoji, context[:project]) - doc.search(".//text()").each do |node| + doc.xpath('descendant-or-self::text()').each do |node| content = node.to_html next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb index 8952a3ff6b4..9d24bf028b6 100644 --- a/lib/banzai/filter/emoji_filter.rb +++ b/lib/banzai/filter/emoji_filter.rb @@ -11,7 +11,7 @@ module Banzai IGNORE_UNICODE_EMOJIS = %w(™ © ®).freeze def call - doc.search(".//text()").each do |node| + doc.xpath('descendant-or-self::text()').each do |node| content = node.to_html next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb index 5474242e03c..0f856dc0eb9 100644 --- a/lib/banzai/filter/footnote_filter.rb +++ b/lib/banzai/filter/footnote_filter.rb @@ -23,17 +23,23 @@ module Banzai FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}\d+\z/.freeze FOOTNOTE_START_NUMBER = 1 + CSS_SECTION = "ol > li[id=#{FOOTNOTE_ID_PREFIX}#{FOOTNOTE_START_NUMBER}]" + XPATH_SECTION = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_SECTION).freeze + CSS_FOOTNOTE = 'sup > a[id]' + XPATH_FOOTNOTE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_FOOTNOTE).freeze + def call - return doc unless first_footnote = doc.at_css("ol > li[id=#{fn_id(FOOTNOTE_START_NUMBER)}]") + return doc unless first_footnote = doc.at_xpath(XPATH_SECTION) # Sanitization stripped off the section wrapper - add it back in first_footnote.parent.wrap('<section class="footnotes">') rand_suffix = "-#{random_number}" modified_footnotes = {} - doc.css('sup > a[id]').each do |link_node| + doc.xpath(XPATH_FOOTNOTE).each do |link_node| ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX) - footnote_node = doc.at_css("li[id=#{fn_id(ref_num)}]") + node_xpath = Gitlab::Utils::Nokogiri.css_to_xpath("li[id=#{fn_id(ref_num)}]") + footnote_node = doc.at_xpath(node_xpath) if INTEGER_PATTERN.match?(ref_num) && (footnote_node || modified_footnotes[ref_num]) link_node[:href] += rand_suffix diff --git a/lib/banzai/filter/gollum_tags_filter.rb b/lib/banzai/filter/gollum_tags_filter.rb index 6de9f2b86f6..0548d5a9997 100644 --- a/lib/banzai/filter/gollum_tags_filter.rb +++ b/lib/banzai/filter/gollum_tags_filter.rb @@ -60,7 +60,7 @@ module Banzai IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set def call - doc.search(".//text()").each do |node| + doc.xpath('descendant-or-self::text()').each do |node| next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) next unless node.content =~ TAGS_PATTERN diff --git a/lib/banzai/filter/image_lazy_load_filter.rb b/lib/banzai/filter/image_lazy_load_filter.rb index d8b9eb29cf5..916c135b777 100644 --- a/lib/banzai/filter/image_lazy_load_filter.rb +++ b/lib/banzai/filter/image_lazy_load_filter.rb @@ -6,8 +6,11 @@ module Banzai # HTML filter that moves the value of image `src` attributes to `data-src` # so they can be lazy loaded. class ImageLazyLoadFilter < HTML::Pipeline::Filter + CSS = 'img' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call - doc.xpath('descendant-or-self::img').each do |img| + doc.xpath(XPATH).each do |img| img.add_class('lazy') img['data-src'] = img['src'] img['src'] = LazyImageTagHelper.placeholder_image diff --git a/lib/banzai/filter/inline_diff_filter.rb b/lib/banzai/filter/inline_diff_filter.rb index 5a1c0bee32d..e47ff15e7b7 100644 --- a/lib/banzai/filter/inline_diff_filter.rb +++ b/lib/banzai/filter/inline_diff_filter.rb @@ -7,7 +7,7 @@ module Banzai IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set def call - doc.search(".//text()").each do |node| + doc.xpath('descendant-or-self::text()').each do |node| next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) content = node.to_html diff --git a/lib/banzai/filter/inline_metrics_redactor_filter.rb b/lib/banzai/filter/inline_metrics_redactor_filter.rb index 2259115acfc..b256815ae84 100644 --- a/lib/banzai/filter/inline_metrics_redactor_filter.rb +++ b/lib/banzai/filter/inline_metrics_redactor_filter.rb @@ -8,6 +8,7 @@ module Banzai include Gitlab::Utils::StrongMemoize METRICS_CSS_CLASS = '.js-render-metrics' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(METRICS_CSS_CLASS).freeze EMBED_LIMIT = 100 Route = Struct.new(:regex, :permission) @@ -41,7 +42,7 @@ module Banzai # @return [Nokogiri::XML::NodeSet] def nodes strong_memoize(:nodes) do - nodes = doc.css(METRICS_CSS_CLASS) + nodes = doc.xpath(XPATH) nodes.drop(EMBED_LIMIT).each(&:remove) nodes diff --git a/lib/banzai/filter/kroki_filter.rb b/lib/banzai/filter/kroki_filter.rb index dbd4de32a47..3803302c324 100644 --- a/lib/banzai/filter/kroki_filter.rb +++ b/lib/banzai/filter/kroki_filter.rb @@ -15,10 +15,11 @@ module Banzai .map { |diagram_type| %(pre[lang="#{diagram_type}"] > code) } .join(', ') - return doc unless doc.at(diagram_selectors) + xpath = Gitlab::Utils::Nokogiri.css_to_xpath(diagram_selectors) + return doc unless doc.at_xpath(xpath) diagram_format = "svg" - doc.css(diagram_selectors).each do |node| + doc.xpath(xpath).each do |node| diagram_type = node.parent['lang'] img_tag = Nokogiri::HTML::DocumentFragment.parse(%(<img src="#{create_image_src(diagram_type, diagram_format, node.content)}"/>)) node.parent.replace(img_tag) diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb index ad32e9afbf5..b69afdcfebe 100644 --- a/lib/banzai/filter/markdown_post_escape_filter.rb +++ b/lib/banzai/filter/markdown_post_escape_filter.rb @@ -8,6 +8,11 @@ module Banzai NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze SPAN_REGEX = %r{<span>(.*?)</span>}.freeze + CSS_A = 'a' + XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze + CSS_CODE = 'code' + XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze + def call return doc unless result[:escaped_literals] @@ -24,12 +29,12 @@ module Banzai # Banzai::Renderer::CommonMark::HTML. However, we eventually want to use # the built-in compiled renderer, rather than the ruby version, for speed. # So let's do this work here. - doc.css('a').each do |node| + doc.xpath(XPATH_A).each do |node| node.attributes['href'].value = node.attributes['href'].value.gsub(SPAN_REGEX, '\1') if node.attributes['href'] node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title'] end - doc.css('code').each do |node| + doc.xpath(XPATH_CODE).each do |node| node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang'] end diff --git a/lib/banzai/filter/math_filter.rb b/lib/banzai/filter/math_filter.rb index 2247984b86d..53dafe45fb3 100644 --- a/lib/banzai/filter/math_filter.rb +++ b/lib/banzai/filter/math_filter.rb @@ -10,6 +10,11 @@ module Banzai # HTML filter that adds class="code math" and removes the dollar sign in $`2+2`$. # class MathFilter < HTML::Pipeline::Filter + CSS_MATH = 'pre.code.language-math' + XPATH_MATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MATH).freeze + CSS_CODE = 'code' + XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze + # Attribute indicating inline or display math. STYLE_ATTRIBUTE = 'data-math-style' @@ -21,7 +26,7 @@ module Banzai DOLLAR_SIGN = '$' def call - doc.css('code').each do |code| + doc.xpath(XPATH_CODE).each do |code| closing = code.next opening = code.previous @@ -39,7 +44,7 @@ module Banzai end end - doc.css('pre.code.language-math').each do |el| + doc.xpath(XPATH_MATH).each do |el| el[STYLE_ATTRIBUTE] = 'display' el[:class] += " #{TAG_CLASS}" end diff --git a/lib/banzai/filter/mermaid_filter.rb b/lib/banzai/filter/mermaid_filter.rb index f0adb83af8a..aaaf851ccf0 100644 --- a/lib/banzai/filter/mermaid_filter.rb +++ b/lib/banzai/filter/mermaid_filter.rb @@ -4,8 +4,11 @@ module Banzai module Filter class MermaidFilter < HTML::Pipeline::Filter + CSS = 'pre[lang="mermaid"] > code' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call - doc.css('pre[lang="mermaid"] > code').add_class('js-render-mermaid') + doc.xpath(XPATH).add_class('js-render-mermaid') doc end diff --git a/lib/banzai/filter/plantuml_filter.rb b/lib/banzai/filter/plantuml_filter.rb index 37d4126c1ba..93370178a61 100644 --- a/lib/banzai/filter/plantuml_filter.rb +++ b/lib/banzai/filter/plantuml_filter.rb @@ -8,12 +8,15 @@ module Banzai # HTML that replaces all `code plantuml` tags with PlantUML img tags. # class PlantumlFilter < HTML::Pipeline::Filter + CSS = 'pre > code[lang="plantuml"]' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call - return doc unless settings.plantuml_enabled? && doc.at('pre > code[lang="plantuml"]') + return doc unless settings.plantuml_enabled? && doc.at_xpath(XPATH) plantuml_setup - doc.css('pre > code[lang="plantuml"]').each do |node| + doc.xpath(XPATH).each do |node| img_tag = Nokogiri::HTML::DocumentFragment.parse( Asciidoctor::PlantUml::Processor.plantuml_content(node.content, {})) node.parent.replace(img_tag) diff --git a/lib/banzai/filter/suggestion_filter.rb b/lib/banzai/filter/suggestion_filter.rb index 56a14ec0737..aa1fcb1021c 100644 --- a/lib/banzai/filter/suggestion_filter.rb +++ b/lib/banzai/filter/suggestion_filter.rb @@ -7,10 +7,13 @@ module Banzai # Class used for tagging elements that should be rendered TAG_CLASS = 'js-render-suggestion' + CSS = 'pre.language-suggestion > code' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call return doc unless suggestions_filter_enabled? - doc.search('pre.language-suggestion > code').each do |node| + doc.xpath(XPATH).each do |node| node.add_class(TAG_CLASS) end diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb index 731a2bb4c77..eceeb113555 100644 --- a/lib/banzai/filter/syntax_highlight_filter.rb +++ b/lib/banzai/filter/syntax_highlight_filter.rb @@ -14,8 +14,11 @@ module Banzai PARAMS_DELIMITER = ':' LANG_PARAMS_ATTR = 'data-lang-params' + CSS = 'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call - doc.search('pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code').each do |node| + doc.xpath(XPATH).each do |node| highlight_node(node) end diff --git a/lib/banzai/filter/table_of_contents_filter.rb b/lib/banzai/filter/table_of_contents_filter.rb index b362607aed2..13ca9cde567 100644 --- a/lib/banzai/filter/table_of_contents_filter.rb +++ b/lib/banzai/filter/table_of_contents_filter.rb @@ -19,6 +19,9 @@ module Banzai class TableOfContentsFilter < HTML::Pipeline::Filter include Gitlab::Utils::Markdown + CSS = 'h1, h2, h3, h4, h5, h6' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + def call return doc if context[:no_header_anchors] @@ -27,7 +30,7 @@ module Banzai headers = Hash.new(0) header_root = current_header = HeaderNode.new - doc.css('h1, h2, h3, h4, h5, h6').each do |node| + doc.xpath(XPATH).each do |node| if header_content = node.children.first id = string_to_anchor(node.text) diff --git a/lib/banzai/filter/truncate_source_filter.rb b/lib/banzai/filter/truncate_source_filter.rb index 44f88b253d9..a21d4a44295 100644 --- a/lib/banzai/filter/truncate_source_filter.rb +++ b/lib/banzai/filter/truncate_source_filter.rb @@ -3,12 +3,29 @@ module Banzai module Filter class TruncateSourceFilter < HTML::Pipeline::TextFilter + CHARACTER_COUNT_LIMIT = 1.megabyte + USER_MSG_LIMIT = 10_000 + def call - return text unless context.key?(:limit) + # don't truncate if it's a :blob and no limit is set + return text if context[:text_source] == :blob && !context.key?(:limit) + + limit = context[:limit] || CHARACTER_COUNT_LIMIT + + # no sense in allowing `truncate_bytes` to duplicate a large + # string unless it's too big + return text if text.bytesize <= limit # Use three dots instead of the ellipsis Unicode character because # some clients show the raw Unicode value in the merge commit. - text.truncate_bytes(context[:limit], omission: '...') + trunc = text.truncate_bytes(limit, omission: '...') + + # allows us to indicate to the user that what they see is a truncated copy + if limit > USER_MSG_LIMIT + trunc.prepend("_The text is longer than #{limit} characters and has been visually truncated._\n\n") + end + + trunc end end end diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb index 44f13612fde..2b95d87ff8e 100644 --- a/lib/banzai/filter/wiki_link_filter.rb +++ b/lib/banzai/filter/wiki_link_filter.rb @@ -10,14 +10,21 @@ module Banzai class WikiLinkFilter < HTML::Pipeline::Filter include Gitlab::Utils::SanitizeNodeLink + CSS_A = 'a:not(.gfm)' + XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze + CSS_VA = 'video, audio' + XPATH_VA = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_VA).freeze + CSS_IMG = 'img' + XPATH_IMG = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_IMG).freeze + def call return doc unless wiki? - doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) } + doc.xpath(XPATH_A).each { |el| process_link(el.attribute('href'), el) } - doc.search('video, audio').each { |el| process_link(el.attribute('src'), el) } + doc.xpath(XPATH_VA).each { |el| process_link(el.attribute('src'), el) } - doc.search('img').each do |el| + doc.xpath(XPATH_IMG).each do |el| attr = el.attribute('data-src') || el.attribute('src') process_link(attr, el) diff --git a/lib/gitlab/diff/suggestions_parser.rb b/lib/gitlab/diff/suggestions_parser.rb index f3e6fc455ac..6f126147113 100644 --- a/lib/gitlab/diff/suggestions_parser.rb +++ b/lib/gitlab/diff/suggestions_parser.rb @@ -6,6 +6,9 @@ module Gitlab # Matches for instance "-1", "+1" or "-1+2". SUGGESTION_CONTEXT = /^(\-(?<above>\d+))?(\+(?<below>\d+))?$/.freeze + CSS = 'pre.language-suggestion' + XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze + class << self # Returns an array of Gitlab::Diff::Suggestion which represents each # suggestion in the given text. @@ -17,7 +20,7 @@ module Gitlab no_original_data: true, suggestions_filter_enabled: supports_suggestion) doc = Nokogiri::HTML(html) - suggestion_nodes = doc.search('pre.language-suggestion') + suggestion_nodes = doc.xpath(XPATH) return [] if suggestion_nodes.empty? diff --git a/lib/gitlab/utils/nokogiri.rb b/lib/gitlab/utils/nokogiri.rb new file mode 100644 index 00000000000..4b37bb7e5ea --- /dev/null +++ b/lib/gitlab/utils/nokogiri.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Gitlab + module Utils + class Nokogiri + class << self + # Use Nokogiri to convert a css selector into an xpath selector. + # Nokogiri can use css selectors with `doc.search()`. However + # for large node trees, it is _much_ slower than using xpath, + # by several orders of magnitude. + # https://gitlab.com/gitlab-org/gitlab/-/issues/329186 + def css_to_xpath(css) + xpath = ::Nokogiri::CSS.xpath_for(css) + + # Due to https://github.com/sparklemotion/nokogiri/issues/572, + # we remove the leading `//` and add `descendant-or-self::` + # in order to ensure we're searching from this node and all + # descendants. + xpath.map { |t| "descendant-or-self::#{t[2..-1]}" }.join('|') + end + end + end + end +end diff --git a/spec/helpers/markup_helper_spec.rb b/spec/helpers/markup_helper_spec.rb index 00a59f037e0..e946857ac77 100644 --- a/spec/helpers/markup_helper_spec.rb +++ b/spec/helpers/markup_helper_spec.rb @@ -418,6 +418,13 @@ FooBar describe '#markup' do let(:content) { 'Noël' } + it 'sets the :text_source to :blob in the context' do + context = {} + helper.markup('foo.md', content, context) + + expect(context).to include(text_source: :blob) + end + it 'preserves encoding' do expect(content.encoding.name).to eq('UTF-8') expect(helper.markup('foo.rst', content).encoding.name).to eq('UTF-8') diff --git a/spec/lib/banzai/filter/truncate_source_filter_spec.rb b/spec/lib/banzai/filter/truncate_source_filter_spec.rb index d5eb8b738b1..8970aa1d382 100644 --- a/spec/lib/banzai/filter/truncate_source_filter_spec.rb +++ b/spec/lib/banzai/filter/truncate_source_filter_spec.rb @@ -8,24 +8,68 @@ RSpec.describe Banzai::Filter::TruncateSourceFilter do let(:short_text) { 'foo' * 10 } let(:long_text) { ([short_text] * 10).join(' ') } - it 'does nothing when limit is unspecified' do - output = filter(long_text) - - expect(output).to eq(long_text) + before do + stub_const("#{described_class}::CHARACTER_COUNT_LIMIT", 50) + stub_const("#{described_class}::USER_MSG_LIMIT", 20) end - it 'does nothing to a short-enough text' do - output = filter(short_text, limit: short_text.bytesize) + context 'when markdown belongs to a blob' do + it 'does nothing when limit is unspecified' do + output = filter(long_text, text_source: :blob) + + expect(output).to eq(long_text) + end + + it 'truncates normally when limit specified' do + truncated = 'foofoof...' + + output = filter(long_text, text_source: :blob, limit: 10) - expect(output).to eq(short_text) + expect(output).to eq(truncated) + end end - it 'truncates UTF-8 text by bytes, on a character boundary' do - utf8_text = '日本語の文字が大きい' - truncated = '日...' + context 'when markdown belongs to a field (non-blob)' do + it 'does nothing when limit is greater' do + output = filter(long_text, limit: 1.megabyte) + + expect(output).to eq(long_text) + end + + it 'truncates to the default when limit is unspecified' do + stub_const("#{described_class}::USER_MSG_LIMIT", 200) + truncated = 'foofoofoofoofoofoofoofoofoofoo foofoofoofoofoof...' + + output = filter(long_text) + + expect(output).to eq(truncated) + end + + it 'prepends the user message' do + truncated = <<~TEXT + _The text is longer than 50 characters and has been visually truncated._ + + foofoofoofoofoofoofoofoofoofoo foofoofoofoofoof... + TEXT + + output = filter(long_text) + + expect(output).to eq(truncated.strip) + end + + it 'does nothing to a short-enough text' do + output = filter(short_text, limit: short_text.bytesize) + + expect(output).to eq(short_text) + end + + it 'truncates UTF-8 text by bytes, on a character boundary' do + utf8_text = '日本語の文字が大きい' + truncated = '日...' - expect(filter(utf8_text, limit: truncated.bytesize)).to eq(truncated) - expect(filter(utf8_text, limit: utf8_text.bytesize)).to eq(utf8_text) - expect(filter(utf8_text, limit: utf8_text.mb_chars.size)).not_to eq(utf8_text) + expect(filter(utf8_text, limit: truncated.bytesize)).to eq(truncated) + expect(filter(utf8_text, limit: utf8_text.bytesize)).to eq(utf8_text) + expect(filter(utf8_text, limit: utf8_text.mb_chars.size)).not_to eq(utf8_text) + end end end diff --git a/spec/lib/gitlab/utils/nokogiri_spec.rb b/spec/lib/gitlab/utils/nokogiri_spec.rb new file mode 100644 index 00000000000..90f137f53c8 --- /dev/null +++ b/spec/lib/gitlab/utils/nokogiri_spec.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Utils::Nokogiri do + describe '#css_to_xpath' do + using RSpec::Parameterized::TableSyntax + + where(:css, :xpath) do + 'img' | "descendant-or-self::img" + 'a.gfm' | "descendant-or-self::a[contains(concat(' ',normalize-space(@class),' '),' gfm ')]" + 'a:not(.gfm)' | "descendant-or-self::a[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]" + 'video, audio' | "descendant-or-self::video|descendant-or-self::audio" + '[data-math-style]' | "descendant-or-self::*[@data-math-style]" + '[data-mermaid-style]' | "descendant-or-self::*[@data-mermaid-style]" + '.js-render-metrics' | "descendant-or-self::*[contains(concat(' ',normalize-space(@class),' '),' js-render-metrics ')]" + 'h1, h2, h3, h4, h5, h6' | "descendant-or-self::h1|descendant-or-self::h2|descendant-or-self::h3|descendant-or-self::h4|descendant-or-self::h5|descendant-or-self::h6" + 'pre.code.language-math' | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' code ') and contains(concat(' ',normalize-space(@class),' '),' language-math ')]" + 'pre > code[lang="plantuml"]' | "descendant-or-self::pre/code[@lang=\"plantuml\"]" + 'pre[lang="mermaid"] > code' | "descendant-or-self::pre[@lang=\"mermaid\"]/code" + 'pre.language-suggestion' | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' language-suggestion ')]" + 'pre.language-suggestion > code' | "descendant-or-self::pre[contains(concat(' ',normalize-space(@class),' '),' language-suggestion ')]/code" + 'a.gfm[data-reference-type="user"]' | "descendant-or-self::a[contains(concat(' ',normalize-space(@class),' '),' gfm ') and @data-reference-type=\"user\"]" + 'a:not(.gfm), img:not(.gfm), video:not(.gfm), audio:not(.gfm)' | "descendant-or-self::a[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::img[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::video[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]|descendant-or-self::audio[not(contains(concat(' ',normalize-space(@class),' '),' gfm '))]" + 'pre:not([data-math-style]):not([data-mermaid-style]):not([data-kroki-style]) > code' | "descendant-or-self::pre[not(@data-math-style) and not(@data-mermaid-style) and not(@data-kroki-style)]/code" + end + + with_them do + it 'generates the xpath' do + expect(described_class.css_to_xpath(css)).to eq xpath + end + end + end +end |