diff options
Diffstat (limited to 'lib/banzai/filter')
-rw-r--r-- | lib/banzai/filter/abstract_reference_filter.rb | 18 | ||||
-rw-r--r-- | lib/banzai/filter/ascii_doc_sanitization_filter.rb | 111 | ||||
-rw-r--r-- | lib/banzai/filter/autolink_filter.rb | 11 | ||||
-rw-r--r-- | lib/banzai/filter/base_sanitization_filter.rb | 68 | ||||
-rw-r--r-- | lib/banzai/filter/commit_reference_filter.rb | 17 | ||||
-rw-r--r-- | lib/banzai/filter/inline_embeds_filter.rb | 2 | ||||
-rw-r--r-- | lib/banzai/filter/inline_metrics_filter.rb | 33 | ||||
-rw-r--r-- | lib/banzai/filter/inline_metrics_redactor_filter.rb | 2 | ||||
-rw-r--r-- | lib/banzai/filter/issuable_reference_filter.rb | 18 | ||||
-rw-r--r-- | lib/banzai/filter/reference_redactor_filter.rb (renamed from lib/banzai/filter/redactor_filter.rb) | 4 | ||||
-rw-r--r-- | lib/banzai/filter/sanitization_filter.rb | 82 | ||||
-rw-r--r-- | lib/banzai/filter/wiki_link_filter.rb | 15 | ||||
-rw-r--r-- | lib/banzai/filter/wiki_link_filter/rewriter.rb | 8 |
13 files changed, 253 insertions, 136 deletions
diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb index 0224dd8fcd1..52af28ce8ec 100644 --- a/lib/banzai/filter/abstract_reference_filter.rb +++ b/lib/banzai/filter/abstract_reference_filter.rb @@ -337,6 +337,24 @@ module Banzai @current_project_namespace_path ||= project&.namespace&.full_path end + def records_per_parent + @_records_per_project ||= {} + + @_records_per_project[object_class.to_s.underscore] ||= begin + hash = Hash.new { |h, k| h[k] = {} } + + parent_per_reference.each do |path, parent| + record_ids = references_per_parent[path] + + parent_records(parent, record_ids).each do |record| + hash[parent][record_identifier(record)] = record + end + end + + hash + end + end + private def full_project_path(namespace, project_ref) diff --git a/lib/banzai/filter/ascii_doc_sanitization_filter.rb b/lib/banzai/filter/ascii_doc_sanitization_filter.rb new file mode 100644 index 00000000000..9105e86ad04 --- /dev/null +++ b/lib/banzai/filter/ascii_doc_sanitization_filter.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # Sanitize HTML produced by AsciiDoc/Asciidoctor. + # + # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. + class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter + # Section anchor link pattern + SECTION_LINK_REF_PATTERN = /\A#{Gitlab::Asciidoc::DEFAULT_ADOC_ATTRS['idprefix']}(:?[[:alnum:]]|-|_)+\z/.freeze + SECTION_HEADINGS = %w(h2 h3 h4 h5 h6).freeze + + # Footnote link patterns + FOOTNOTE_LINK_ID_PATTERNS = { + a: /\A_footnoteref_\d+\z/, + div: /\A_footnotedef_\d+\z/ + }.freeze + + # Classes used by Asciidoctor to style components + ADMONITION_CLASSES = %w(fa icon-note icon-tip icon-warning icon-caution icon-important).freeze + CALLOUT_CLASSES = ['conum'].freeze + CHECKLIST_CLASSES = %w(fa fa-check-square-o fa-square-o).freeze + LIST_CLASSES = %w(checklist none no-bullet unnumbered unstyled).freeze + + ELEMENT_CLASSES_WHITELIST = { + span: %w(big small underline overline line-through).freeze, + div: ['admonitionblock'].freeze, + td: ['icon'].freeze, + i: ADMONITION_CLASSES + CALLOUT_CLASSES + CHECKLIST_CLASSES, + ul: LIST_CLASSES, + ol: LIST_CLASSES, + a: ['anchor'].freeze + }.freeze + + def customize_whitelist(whitelist) + # Allow marks + whitelist[:elements].push('mark') + + # Allow any classes in `span`, `i`, `div`, `td`, `ul`, `ol` and `a` elements + # but then remove any unknown classes + whitelist[:attributes]['span'] = %w(class) + whitelist[:attributes]['div'].push('class') + whitelist[:attributes]['td'] = %w(class) + whitelist[:attributes]['i'] = %w(class) + whitelist[:attributes]['ul'] = %w(class) + whitelist[:attributes]['ol'] = %w(class) + whitelist[:attributes]['a'].push('class') + whitelist[:transformers].push(self.class.remove_element_classes) + + # Allow `id` in heading elements for section anchors + SECTION_HEADINGS.each do |header| + whitelist[:attributes][header] = %w(id) + end + whitelist[:transformers].push(self.class.remove_non_heading_ids) + + # Allow `id` in footnote elements + FOOTNOTE_LINK_ID_PATTERNS.keys.each do |element| + whitelist[:attributes][element.to_s].push('id') + end + whitelist[:transformers].push(self.class.remove_non_footnote_ids) + + whitelist + end + + class << self + def remove_non_footnote_ids + lambda do |env| + node = env[:node] + + return unless (pattern = FOOTNOTE_LINK_ID_PATTERNS[node.name.to_sym]) + return unless node.has_attribute?('id') + + return if node['id'] =~ pattern + + node.remove_attribute('id') + end + end + + def remove_non_heading_ids + lambda do |env| + node = env[:node] + + return unless SECTION_HEADINGS.any?(node.name) + return unless node.has_attribute?('id') + + return if node['id'] =~ SECTION_LINK_REF_PATTERN + + node.remove_attribute('id') + end + end + + def remove_element_classes + lambda do |env| + node = env[:node] + + return unless (classes_whitelist = ELEMENT_CLASSES_WHITELIST[node.name.to_sym]) + return unless node.has_attribute?('class') + + classes = node['class'].strip.split(' ') + allowed_classes = (classes & classes_whitelist) + if allowed_classes.empty? + node.remove_attribute('class') + else + node['class'] = allowed_classes.join(' ') + end + end + end + end + end + end +end diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index 56214043d87..5f2cbc24c60 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -18,6 +18,7 @@ module Banzai # class AutolinkFilter < HTML::Pipeline::Filter include ActionView::Helpers::TagHelper + include Gitlab::Utils::SanitizeNodeLink # Pattern to match text that should be autolinked. # @@ -72,19 +73,11 @@ module Banzai private - # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme - def contains_unsafe?(scheme) - return false unless scheme - - scheme = scheme.strip.downcase - Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } - end - def autolink_match(match) # start by stripping out dangerous links begin uri = Addressable::URI.parse(match) - return match if contains_unsafe?(uri.scheme) + return match unless safe_protocol?(uri.scheme) rescue Addressable::URI::InvalidURIError return match end diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb new file mode 100644 index 00000000000..2dabca3552d --- /dev/null +++ b/lib/banzai/filter/base_sanitization_filter.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # Sanitize HTML produced by markup languages (Markdown, AsciiDoc...). + # Specific rules are implemented in dedicated filters: + # + # - Banzai::Filter::SanitizationFilter (Markdown) + # - Banzai::Filter::AsciiDocSanitizationFilter (AsciiDoc/Asciidoctor) + # + # Extends HTML::Pipeline::SanitizationFilter with common rules. + class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter + include Gitlab::Utils::StrongMemoize + extend Gitlab::Utils::SanitizeNodeLink + + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + + def whitelist + strong_memoize(:whitelist) do + whitelist = super.deep_dup + + # Allow span elements + whitelist[:elements].push('span') + + # Allow data-math-style attribute in order to support LaTeX formatting + whitelist[:attributes]['code'] = %w(data-math-style) + whitelist[:attributes]['pre'] = %w(data-math-style) + + # Allow html5 details/summary elements + whitelist[:elements].push('details') + whitelist[:elements].push('summary') + + # Allow abbr elements with title attribute + whitelist[:elements].push('abbr') + whitelist[:attributes]['abbr'] = %w(title) + + # Disallow `name` attribute globally, allow on `a` + whitelist[:attributes][:all].delete('name') + whitelist[:attributes]['a'].push('name') + + # Allow any protocol in `a` elements + # and then remove links with unsafe protocols + whitelist[:protocols].delete('a') + whitelist[:transformers].push(self.class.method(:remove_unsafe_links)) + + # Remove `rel` attribute from `a` elements + whitelist[:transformers].push(self.class.remove_rel) + + customize_whitelist(whitelist) + end + end + + def customize_whitelist(whitelist) + raise NotImplementedError + end + + class << self + def remove_rel + lambda do |env| + if env[:node_name] == 'a' + env[:node].remove_attribute('rel') + end + end + end + end + end + end +end diff --git a/lib/banzai/filter/commit_reference_filter.rb b/lib/banzai/filter/commit_reference_filter.rb index c3e5ac41cb8..e1d7b36b9a2 100644 --- a/lib/banzai/filter/commit_reference_filter.rb +++ b/lib/banzai/filter/commit_reference_filter.rb @@ -19,12 +19,11 @@ module Banzai end def find_object(project, id) - return unless project.is_a?(Project) + return unless project.is_a?(Project) && project.valid_repo? - if project && project.valid_repo? - # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/43894 - Gitlab::GitalyClient.allow_n_plus_1_calls { project.commit(id) } - end + _, record = records_per_parent[project].detect { |k, _v| Gitlab::Git.shas_eql?(k, id) } + + record end def referenced_merge_request_commit_shas @@ -66,6 +65,14 @@ module Banzai private + def record_identifier(record) + record.id + end + + def parent_records(parent, ids) + parent.commits_by(oids: ids.to_a) + end + def noteable context[:noteable] end diff --git a/lib/banzai/filter/inline_embeds_filter.rb b/lib/banzai/filter/inline_embeds_filter.rb index 97394fd8f82..9f1ef0796f0 100644 --- a/lib/banzai/filter/inline_embeds_filter.rb +++ b/lib/banzai/filter/inline_embeds_filter.rb @@ -10,8 +10,6 @@ module Banzai # the link, and insert this node after any html content # surrounding the link. def call - return doc unless Feature.enabled?(:gfm_embedded_metrics, context[:project]) - doc.xpath(xpath_search).each do |node| next unless element = element_to_embed(node) diff --git a/lib/banzai/filter/inline_metrics_filter.rb b/lib/banzai/filter/inline_metrics_filter.rb index 0120cc37d6f..c5a328c21b2 100644 --- a/lib/banzai/filter/inline_metrics_filter.rb +++ b/lib/banzai/filter/inline_metrics_filter.rb @@ -15,17 +15,6 @@ module Banzai ) end - # Endpoint FE should hit to collect the appropriate - # chart information - def metrics_dashboard_url(params) - Gitlab::Metrics::Dashboard::Url.build_dashboard_url( - params['namespace'], - params['project'], - params['environment'], - embedded: true - ) - end - # Search params for selecting metrics links. A few # simple checks is enough to boost performance without # the cost of doing a full regex match. @@ -38,6 +27,28 @@ module Banzai def link_pattern Gitlab::Metrics::Dashboard::Url.regex end + + private + + # Endpoint FE should hit to collect the appropriate + # chart information + def metrics_dashboard_url(params) + Gitlab::Metrics::Dashboard::Url.build_dashboard_url( + params['namespace'], + params['project'], + params['environment'], + embedded: true, + **query_params(params['url']) + ) + end + + # Parses query params out from full url string into hash. + # + # Ex) 'https://<root>/<project>/<environment>/metrics?title=Title&group=Group' + # --> { title: 'Title', group: 'Group' } + def query_params(url) + Gitlab::Metrics::Dashboard::Url.parse_query(url) + end end end end diff --git a/lib/banzai/filter/inline_metrics_redactor_filter.rb b/lib/banzai/filter/inline_metrics_redactor_filter.rb index ff91be2cbb7..4d8a5028898 100644 --- a/lib/banzai/filter/inline_metrics_redactor_filter.rb +++ b/lib/banzai/filter/inline_metrics_redactor_filter.rb @@ -13,8 +13,6 @@ module Banzai # uses to identify the embedded content, removing # only unnecessary nodes. def call - return doc unless Feature.enabled?(:gfm_embedded_metrics, context[:project]) - nodes.each do |node| path = paths_by_node[node] user_has_access = user_access_by_path[path] diff --git a/lib/banzai/filter/issuable_reference_filter.rb b/lib/banzai/filter/issuable_reference_filter.rb index 2963cba91e8..b91ba9f7256 100644 --- a/lib/banzai/filter/issuable_reference_filter.rb +++ b/lib/banzai/filter/issuable_reference_filter.rb @@ -3,22 +3,8 @@ module Banzai module Filter class IssuableReferenceFilter < AbstractReferenceFilter - def records_per_parent - @records_per_project ||= {} - - @records_per_project[object_class.to_s.underscore] ||= begin - hash = Hash.new { |h, k| h[k] = {} } - - parent_per_reference.each do |path, parent| - record_ids = references_per_parent[path] - - parent_records(parent, record_ids).each do |record| - hash[parent][record.iid.to_i] = record - end - end - - hash - end + def record_identifier(record) + record.iid.to_i end def find_object(parent, iid) diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/reference_redactor_filter.rb index 1f091f594f8..485d3fd5fc7 100644 --- a/lib/banzai/filter/redactor_filter.rb +++ b/lib/banzai/filter/reference_redactor_filter.rb @@ -7,12 +7,12 @@ module Banzai # # Expected to be run in its own post-processing pipeline. # - class RedactorFilter < HTML::Pipeline::Filter + class ReferenceRedactorFilter < HTML::Pipeline::Filter def call unless context[:skip_redaction] context = RenderContext.new(project, current_user) - Redactor.new(context).redact([doc]) + ReferenceRedactor.new(context).redact([doc]) end doc diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index a4a06eae7b7..f57e57890f8 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -2,23 +2,13 @@ module Banzai module Filter - # Sanitize HTML + # Sanitize HTML produced by Markdown. # - # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist. - class SanitizationFilter < HTML::Pipeline::SanitizationFilter - include Gitlab::Utils::StrongMemoize - - UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. + class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter + # Styles used by Markdown for table alignment TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze - def whitelist - strong_memoize(:whitelist) do - customize_whitelist(super.deep_dup) - end - end - - private - def customize_whitelist(whitelist) # Allow table alignment; we whitelist specific text-align values in a # transformer below @@ -26,36 +16,9 @@ module Banzai whitelist[:attributes]['td'] = %w(style) whitelist[:css] = { properties: ['text-align'] } - # Allow span elements - whitelist[:elements].push('span') - - # Allow data-math-style attribute in order to support LaTeX formatting - whitelist[:attributes]['code'] = %w(data-math-style) - whitelist[:attributes]['pre'] = %w(data-math-style) - - # Allow html5 details/summary elements - whitelist[:elements].push('details') - whitelist[:elements].push('summary') - - # Allow abbr elements with title attribute - whitelist[:elements].push('abbr') - whitelist[:attributes]['abbr'] = %w(title) - # Allow the 'data-sourcepos' from CommonMark on all elements whitelist[:attributes][:all].push('data-sourcepos') - # Disallow `name` attribute globally, allow on `a` - whitelist[:attributes][:all].delete('name') - whitelist[:attributes]['a'].push('name') - - # Allow any protocol in `a` elements - # and then remove links with unsafe protocols - whitelist[:protocols].delete('a') - whitelist[:transformers].push(self.class.remove_unsafe_links) - - # Remove `rel` attribute from `a` elements - whitelist[:transformers].push(self.class.remove_rel) - # Remove any `style` properties not required for table alignment whitelist[:transformers].push(self.class.remove_unsafe_table_style) @@ -69,43 +32,6 @@ module Banzai end class << self - def remove_unsafe_links - lambda do |env| - node = env[:node] - - return unless node.name == 'a' - return unless node.has_attribute?('href') - - begin - node['href'] = node['href'].strip - uri = Addressable::URI.parse(node['href']) - - return unless uri.scheme - - # Remove all invalid scheme characters before checking against the - # list of unsafe protocols. - # - # See https://tools.ietf.org/html/rfc3986#section-3.1 - scheme = uri.scheme - .strip - .downcase - .gsub(/[^A-Za-z0-9\+\.\-]+/, '') - - node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) - rescue Addressable::URI::InvalidURIError - node.remove_attribute('href') - end - end - end - - def remove_rel - lambda do |env| - if env[:node_name] == 'a' - env[:node].remove_attribute('rel') - end - end - end - def remove_unsafe_table_style lambda do |env| node = env[:node] diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb index 1728a442533..18947679b69 100644 --- a/lib/banzai/filter/wiki_link_filter.rb +++ b/lib/banzai/filter/wiki_link_filter.rb @@ -8,15 +8,19 @@ module Banzai # Context options: # :project_wiki class WikiLinkFilter < HTML::Pipeline::Filter + include Gitlab::Utils::SanitizeNodeLink + def call return doc unless project_wiki? - doc.search('a:not(.gfm)').each { |el| process_link_attr(el.attribute('href')) } - doc.search('video').each { |el| process_link_attr(el.attribute('src')) } + doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) } + + doc.search('video').each { |el| process_link(el.attribute('src'), el) } + doc.search('img').each do |el| attr = el.attribute('data-src') || el.attribute('src') - process_link_attr(attr) + process_link(attr, el) end doc @@ -24,6 +28,11 @@ module Banzai protected + def process_link(link_attr, node) + process_link_attr(link_attr) + remove_unsafe_links({ node: node }, remove_invalid_links: false) + end + def project_wiki? !context[:project_wiki].nil? end diff --git a/lib/banzai/filter/wiki_link_filter/rewriter.rb b/lib/banzai/filter/wiki_link_filter/rewriter.rb index 77b5053f38c..f4cc8beeb52 100644 --- a/lib/banzai/filter/wiki_link_filter/rewriter.rb +++ b/lib/banzai/filter/wiki_link_filter/rewriter.rb @@ -4,8 +4,6 @@ module Banzai module Filter class WikiLinkFilter < HTML::Pipeline::Filter class Rewriter - UNSAFE_SLUG_REGEXES = [/\Ajavascript:/i].freeze - def initialize(link_string, wiki:, slug:) @uri = Addressable::URI.parse(link_string) @wiki_base_path = wiki && wiki.wiki_base_path @@ -37,8 +35,6 @@ module Banzai # Of the form `./link`, `../link`, or similar def apply_hierarchical_link_rules! - return if slug_considered_unsafe? - @uri = Addressable::URI.join(@slug, @uri) if @uri.to_s[0] == '.' end @@ -58,10 +54,6 @@ module Banzai def repository_upload? @uri.relative? && @uri.path.starts_with?(Wikis::CreateAttachmentService::ATTACHMENT_PATH) end - - def slug_considered_unsafe? - UNSAFE_SLUG_REGEXES.any? { |r| r.match?(@slug) } - end end end end |