summaryrefslogtreecommitdiff
path: root/lib/banzai/filter
diff options
context:
space:
mode:
Diffstat (limited to 'lib/banzai/filter')
-rw-r--r--lib/banzai/filter/abstract_reference_filter.rb18
-rw-r--r--lib/banzai/filter/ascii_doc_sanitization_filter.rb111
-rw-r--r--lib/banzai/filter/autolink_filter.rb11
-rw-r--r--lib/banzai/filter/base_sanitization_filter.rb68
-rw-r--r--lib/banzai/filter/commit_reference_filter.rb17
-rw-r--r--lib/banzai/filter/inline_embeds_filter.rb2
-rw-r--r--lib/banzai/filter/inline_metrics_filter.rb33
-rw-r--r--lib/banzai/filter/inline_metrics_redactor_filter.rb2
-rw-r--r--lib/banzai/filter/issuable_reference_filter.rb18
-rw-r--r--lib/banzai/filter/reference_redactor_filter.rb (renamed from lib/banzai/filter/redactor_filter.rb)4
-rw-r--r--lib/banzai/filter/sanitization_filter.rb82
-rw-r--r--lib/banzai/filter/wiki_link_filter.rb15
-rw-r--r--lib/banzai/filter/wiki_link_filter/rewriter.rb8
13 files changed, 253 insertions, 136 deletions
diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb
index 0224dd8fcd1..52af28ce8ec 100644
--- a/lib/banzai/filter/abstract_reference_filter.rb
+++ b/lib/banzai/filter/abstract_reference_filter.rb
@@ -337,6 +337,24 @@ module Banzai
@current_project_namespace_path ||= project&.namespace&.full_path
end
+ def records_per_parent
+ @_records_per_project ||= {}
+
+ @_records_per_project[object_class.to_s.underscore] ||= begin
+ hash = Hash.new { |h, k| h[k] = {} }
+
+ parent_per_reference.each do |path, parent|
+ record_ids = references_per_parent[path]
+
+ parent_records(parent, record_ids).each do |record|
+ hash[parent][record_identifier(record)] = record
+ end
+ end
+
+ hash
+ end
+ end
+
private
def full_project_path(namespace, project_ref)
diff --git a/lib/banzai/filter/ascii_doc_sanitization_filter.rb b/lib/banzai/filter/ascii_doc_sanitization_filter.rb
new file mode 100644
index 00000000000..9105e86ad04
--- /dev/null
+++ b/lib/banzai/filter/ascii_doc_sanitization_filter.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+module Banzai
+ module Filter
+ # Sanitize HTML produced by AsciiDoc/Asciidoctor.
+ #
+ # Extends Banzai::Filter::BaseSanitizationFilter with specific rules.
+ class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter
+ # Section anchor link pattern
+ SECTION_LINK_REF_PATTERN = /\A#{Gitlab::Asciidoc::DEFAULT_ADOC_ATTRS['idprefix']}(:?[[:alnum:]]|-|_)+\z/.freeze
+ SECTION_HEADINGS = %w(h2 h3 h4 h5 h6).freeze
+
+ # Footnote link patterns
+ FOOTNOTE_LINK_ID_PATTERNS = {
+ a: /\A_footnoteref_\d+\z/,
+ div: /\A_footnotedef_\d+\z/
+ }.freeze
+
+ # Classes used by Asciidoctor to style components
+ ADMONITION_CLASSES = %w(fa icon-note icon-tip icon-warning icon-caution icon-important).freeze
+ CALLOUT_CLASSES = ['conum'].freeze
+ CHECKLIST_CLASSES = %w(fa fa-check-square-o fa-square-o).freeze
+ LIST_CLASSES = %w(checklist none no-bullet unnumbered unstyled).freeze
+
+ ELEMENT_CLASSES_WHITELIST = {
+ span: %w(big small underline overline line-through).freeze,
+ div: ['admonitionblock'].freeze,
+ td: ['icon'].freeze,
+ i: ADMONITION_CLASSES + CALLOUT_CLASSES + CHECKLIST_CLASSES,
+ ul: LIST_CLASSES,
+ ol: LIST_CLASSES,
+ a: ['anchor'].freeze
+ }.freeze
+
+ def customize_whitelist(whitelist)
+ # Allow marks
+ whitelist[:elements].push('mark')
+
+ # Allow any classes in `span`, `i`, `div`, `td`, `ul`, `ol` and `a` elements
+ # but then remove any unknown classes
+ whitelist[:attributes]['span'] = %w(class)
+ whitelist[:attributes]['div'].push('class')
+ whitelist[:attributes]['td'] = %w(class)
+ whitelist[:attributes]['i'] = %w(class)
+ whitelist[:attributes]['ul'] = %w(class)
+ whitelist[:attributes]['ol'] = %w(class)
+ whitelist[:attributes]['a'].push('class')
+ whitelist[:transformers].push(self.class.remove_element_classes)
+
+ # Allow `id` in heading elements for section anchors
+ SECTION_HEADINGS.each do |header|
+ whitelist[:attributes][header] = %w(id)
+ end
+ whitelist[:transformers].push(self.class.remove_non_heading_ids)
+
+ # Allow `id` in footnote elements
+ FOOTNOTE_LINK_ID_PATTERNS.keys.each do |element|
+ whitelist[:attributes][element.to_s].push('id')
+ end
+ whitelist[:transformers].push(self.class.remove_non_footnote_ids)
+
+ whitelist
+ end
+
+ class << self
+ def remove_non_footnote_ids
+ lambda do |env|
+ node = env[:node]
+
+ return unless (pattern = FOOTNOTE_LINK_ID_PATTERNS[node.name.to_sym])
+ return unless node.has_attribute?('id')
+
+ return if node['id'] =~ pattern
+
+ node.remove_attribute('id')
+ end
+ end
+
+ def remove_non_heading_ids
+ lambda do |env|
+ node = env[:node]
+
+ return unless SECTION_HEADINGS.any?(node.name)
+ return unless node.has_attribute?('id')
+
+ return if node['id'] =~ SECTION_LINK_REF_PATTERN
+
+ node.remove_attribute('id')
+ end
+ end
+
+ def remove_element_classes
+ lambda do |env|
+ node = env[:node]
+
+ return unless (classes_whitelist = ELEMENT_CLASSES_WHITELIST[node.name.to_sym])
+ return unless node.has_attribute?('class')
+
+ classes = node['class'].strip.split(' ')
+ allowed_classes = (classes & classes_whitelist)
+ if allowed_classes.empty?
+ node.remove_attribute('class')
+ else
+ node['class'] = allowed_classes.join(' ')
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index 56214043d87..5f2cbc24c60 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -18,6 +18,7 @@ module Banzai
#
class AutolinkFilter < HTML::Pipeline::Filter
include ActionView::Helpers::TagHelper
+ include Gitlab::Utils::SanitizeNodeLink
# Pattern to match text that should be autolinked.
#
@@ -72,19 +73,11 @@ module Banzai
private
- # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
- def contains_unsafe?(scheme)
- return false unless scheme
-
- scheme = scheme.strip.downcase
- Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
- end
-
def autolink_match(match)
# start by stripping out dangerous links
begin
uri = Addressable::URI.parse(match)
- return match if contains_unsafe?(uri.scheme)
+ return match unless safe_protocol?(uri.scheme)
rescue Addressable::URI::InvalidURIError
return match
end
diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb
new file mode 100644
index 00000000000..2dabca3552d
--- /dev/null
+++ b/lib/banzai/filter/base_sanitization_filter.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+module Banzai
+ module Filter
+ # Sanitize HTML produced by markup languages (Markdown, AsciiDoc...).
+ # Specific rules are implemented in dedicated filters:
+ #
+ # - Banzai::Filter::SanitizationFilter (Markdown)
+ # - Banzai::Filter::AsciiDocSanitizationFilter (AsciiDoc/Asciidoctor)
+ #
+ # Extends HTML::Pipeline::SanitizationFilter with common rules.
+ class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter
+ include Gitlab::Utils::StrongMemoize
+ extend Gitlab::Utils::SanitizeNodeLink
+
+ UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
+
+ def whitelist
+ strong_memoize(:whitelist) do
+ whitelist = super.deep_dup
+
+ # Allow span elements
+ whitelist[:elements].push('span')
+
+ # Allow data-math-style attribute in order to support LaTeX formatting
+ whitelist[:attributes]['code'] = %w(data-math-style)
+ whitelist[:attributes]['pre'] = %w(data-math-style)
+
+ # Allow html5 details/summary elements
+ whitelist[:elements].push('details')
+ whitelist[:elements].push('summary')
+
+ # Allow abbr elements with title attribute
+ whitelist[:elements].push('abbr')
+ whitelist[:attributes]['abbr'] = %w(title)
+
+ # Disallow `name` attribute globally, allow on `a`
+ whitelist[:attributes][:all].delete('name')
+ whitelist[:attributes]['a'].push('name')
+
+ # Allow any protocol in `a` elements
+ # and then remove links with unsafe protocols
+ whitelist[:protocols].delete('a')
+ whitelist[:transformers].push(self.class.method(:remove_unsafe_links))
+
+ # Remove `rel` attribute from `a` elements
+ whitelist[:transformers].push(self.class.remove_rel)
+
+ customize_whitelist(whitelist)
+ end
+ end
+
+ def customize_whitelist(whitelist)
+ raise NotImplementedError
+ end
+
+ class << self
+ def remove_rel
+ lambda do |env|
+ if env[:node_name] == 'a'
+ env[:node].remove_attribute('rel')
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/banzai/filter/commit_reference_filter.rb b/lib/banzai/filter/commit_reference_filter.rb
index c3e5ac41cb8..e1d7b36b9a2 100644
--- a/lib/banzai/filter/commit_reference_filter.rb
+++ b/lib/banzai/filter/commit_reference_filter.rb
@@ -19,12 +19,11 @@ module Banzai
end
def find_object(project, id)
- return unless project.is_a?(Project)
+ return unless project.is_a?(Project) && project.valid_repo?
- if project && project.valid_repo?
- # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/43894
- Gitlab::GitalyClient.allow_n_plus_1_calls { project.commit(id) }
- end
+ _, record = records_per_parent[project].detect { |k, _v| Gitlab::Git.shas_eql?(k, id) }
+
+ record
end
def referenced_merge_request_commit_shas
@@ -66,6 +65,14 @@ module Banzai
private
+ def record_identifier(record)
+ record.id
+ end
+
+ def parent_records(parent, ids)
+ parent.commits_by(oids: ids.to_a)
+ end
+
def noteable
context[:noteable]
end
diff --git a/lib/banzai/filter/inline_embeds_filter.rb b/lib/banzai/filter/inline_embeds_filter.rb
index 97394fd8f82..9f1ef0796f0 100644
--- a/lib/banzai/filter/inline_embeds_filter.rb
+++ b/lib/banzai/filter/inline_embeds_filter.rb
@@ -10,8 +10,6 @@ module Banzai
# the link, and insert this node after any html content
# surrounding the link.
def call
- return doc unless Feature.enabled?(:gfm_embedded_metrics, context[:project])
-
doc.xpath(xpath_search).each do |node|
next unless element = element_to_embed(node)
diff --git a/lib/banzai/filter/inline_metrics_filter.rb b/lib/banzai/filter/inline_metrics_filter.rb
index 0120cc37d6f..c5a328c21b2 100644
--- a/lib/banzai/filter/inline_metrics_filter.rb
+++ b/lib/banzai/filter/inline_metrics_filter.rb
@@ -15,17 +15,6 @@ module Banzai
)
end
- # Endpoint FE should hit to collect the appropriate
- # chart information
- def metrics_dashboard_url(params)
- Gitlab::Metrics::Dashboard::Url.build_dashboard_url(
- params['namespace'],
- params['project'],
- params['environment'],
- embedded: true
- )
- end
-
# Search params for selecting metrics links. A few
# simple checks is enough to boost performance without
# the cost of doing a full regex match.
@@ -38,6 +27,28 @@ module Banzai
def link_pattern
Gitlab::Metrics::Dashboard::Url.regex
end
+
+ private
+
+ # Endpoint FE should hit to collect the appropriate
+ # chart information
+ def metrics_dashboard_url(params)
+ Gitlab::Metrics::Dashboard::Url.build_dashboard_url(
+ params['namespace'],
+ params['project'],
+ params['environment'],
+ embedded: true,
+ **query_params(params['url'])
+ )
+ end
+
+ # Parses query params out from full url string into hash.
+ #
+ # Ex) 'https://<root>/<project>/<environment>/metrics?title=Title&group=Group'
+ # --> { title: 'Title', group: 'Group' }
+ def query_params(url)
+ Gitlab::Metrics::Dashboard::Url.parse_query(url)
+ end
end
end
end
diff --git a/lib/banzai/filter/inline_metrics_redactor_filter.rb b/lib/banzai/filter/inline_metrics_redactor_filter.rb
index ff91be2cbb7..4d8a5028898 100644
--- a/lib/banzai/filter/inline_metrics_redactor_filter.rb
+++ b/lib/banzai/filter/inline_metrics_redactor_filter.rb
@@ -13,8 +13,6 @@ module Banzai
# uses to identify the embedded content, removing
# only unnecessary nodes.
def call
- return doc unless Feature.enabled?(:gfm_embedded_metrics, context[:project])
-
nodes.each do |node|
path = paths_by_node[node]
user_has_access = user_access_by_path[path]
diff --git a/lib/banzai/filter/issuable_reference_filter.rb b/lib/banzai/filter/issuable_reference_filter.rb
index 2963cba91e8..b91ba9f7256 100644
--- a/lib/banzai/filter/issuable_reference_filter.rb
+++ b/lib/banzai/filter/issuable_reference_filter.rb
@@ -3,22 +3,8 @@
module Banzai
module Filter
class IssuableReferenceFilter < AbstractReferenceFilter
- def records_per_parent
- @records_per_project ||= {}
-
- @records_per_project[object_class.to_s.underscore] ||= begin
- hash = Hash.new { |h, k| h[k] = {} }
-
- parent_per_reference.each do |path, parent|
- record_ids = references_per_parent[path]
-
- parent_records(parent, record_ids).each do |record|
- hash[parent][record.iid.to_i] = record
- end
- end
-
- hash
- end
+ def record_identifier(record)
+ record.iid.to_i
end
def find_object(parent, iid)
diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/reference_redactor_filter.rb
index 1f091f594f8..485d3fd5fc7 100644
--- a/lib/banzai/filter/redactor_filter.rb
+++ b/lib/banzai/filter/reference_redactor_filter.rb
@@ -7,12 +7,12 @@ module Banzai
#
# Expected to be run in its own post-processing pipeline.
#
- class RedactorFilter < HTML::Pipeline::Filter
+ class ReferenceRedactorFilter < HTML::Pipeline::Filter
def call
unless context[:skip_redaction]
context = RenderContext.new(project, current_user)
- Redactor.new(context).redact([doc])
+ ReferenceRedactor.new(context).redact([doc])
end
doc
diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb
index a4a06eae7b7..f57e57890f8 100644
--- a/lib/banzai/filter/sanitization_filter.rb
+++ b/lib/banzai/filter/sanitization_filter.rb
@@ -2,23 +2,13 @@
module Banzai
module Filter
- # Sanitize HTML
+ # Sanitize HTML produced by Markdown.
#
- # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist.
- class SanitizationFilter < HTML::Pipeline::SanitizationFilter
- include Gitlab::Utils::StrongMemoize
-
- UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
+ # Extends Banzai::Filter::BaseSanitizationFilter with specific rules.
+ class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter
+ # Styles used by Markdown for table alignment
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze
- def whitelist
- strong_memoize(:whitelist) do
- customize_whitelist(super.deep_dup)
- end
- end
-
- private
-
def customize_whitelist(whitelist)
# Allow table alignment; we whitelist specific text-align values in a
# transformer below
@@ -26,36 +16,9 @@ module Banzai
whitelist[:attributes]['td'] = %w(style)
whitelist[:css] = { properties: ['text-align'] }
- # Allow span elements
- whitelist[:elements].push('span')
-
- # Allow data-math-style attribute in order to support LaTeX formatting
- whitelist[:attributes]['code'] = %w(data-math-style)
- whitelist[:attributes]['pre'] = %w(data-math-style)
-
- # Allow html5 details/summary elements
- whitelist[:elements].push('details')
- whitelist[:elements].push('summary')
-
- # Allow abbr elements with title attribute
- whitelist[:elements].push('abbr')
- whitelist[:attributes]['abbr'] = %w(title)
-
# Allow the 'data-sourcepos' from CommonMark on all elements
whitelist[:attributes][:all].push('data-sourcepos')
- # Disallow `name` attribute globally, allow on `a`
- whitelist[:attributes][:all].delete('name')
- whitelist[:attributes]['a'].push('name')
-
- # Allow any protocol in `a` elements
- # and then remove links with unsafe protocols
- whitelist[:protocols].delete('a')
- whitelist[:transformers].push(self.class.remove_unsafe_links)
-
- # Remove `rel` attribute from `a` elements
- whitelist[:transformers].push(self.class.remove_rel)
-
# Remove any `style` properties not required for table alignment
whitelist[:transformers].push(self.class.remove_unsafe_table_style)
@@ -69,43 +32,6 @@ module Banzai
end
class << self
- def remove_unsafe_links
- lambda do |env|
- node = env[:node]
-
- return unless node.name == 'a'
- return unless node.has_attribute?('href')
-
- begin
- node['href'] = node['href'].strip
- uri = Addressable::URI.parse(node['href'])
-
- return unless uri.scheme
-
- # Remove all invalid scheme characters before checking against the
- # list of unsafe protocols.
- #
- # See https://tools.ietf.org/html/rfc3986#section-3.1
- scheme = uri.scheme
- .strip
- .downcase
- .gsub(/[^A-Za-z0-9\+\.\-]+/, '')
-
- node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme)
- rescue Addressable::URI::InvalidURIError
- node.remove_attribute('href')
- end
- end
- end
-
- def remove_rel
- lambda do |env|
- if env[:node_name] == 'a'
- env[:node].remove_attribute('rel')
- end
- end
- end
-
def remove_unsafe_table_style
lambda do |env|
node = env[:node]
diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb
index 1728a442533..18947679b69 100644
--- a/lib/banzai/filter/wiki_link_filter.rb
+++ b/lib/banzai/filter/wiki_link_filter.rb
@@ -8,15 +8,19 @@ module Banzai
# Context options:
# :project_wiki
class WikiLinkFilter < HTML::Pipeline::Filter
+ include Gitlab::Utils::SanitizeNodeLink
+
def call
return doc unless project_wiki?
- doc.search('a:not(.gfm)').each { |el| process_link_attr(el.attribute('href')) }
- doc.search('video').each { |el| process_link_attr(el.attribute('src')) }
+ doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) }
+
+ doc.search('video').each { |el| process_link(el.attribute('src'), el) }
+
doc.search('img').each do |el|
attr = el.attribute('data-src') || el.attribute('src')
- process_link_attr(attr)
+ process_link(attr, el)
end
doc
@@ -24,6 +28,11 @@ module Banzai
protected
+ def process_link(link_attr, node)
+ process_link_attr(link_attr)
+ remove_unsafe_links({ node: node }, remove_invalid_links: false)
+ end
+
def project_wiki?
!context[:project_wiki].nil?
end
diff --git a/lib/banzai/filter/wiki_link_filter/rewriter.rb b/lib/banzai/filter/wiki_link_filter/rewriter.rb
index 77b5053f38c..f4cc8beeb52 100644
--- a/lib/banzai/filter/wiki_link_filter/rewriter.rb
+++ b/lib/banzai/filter/wiki_link_filter/rewriter.rb
@@ -4,8 +4,6 @@ module Banzai
module Filter
class WikiLinkFilter < HTML::Pipeline::Filter
class Rewriter
- UNSAFE_SLUG_REGEXES = [/\Ajavascript:/i].freeze
-
def initialize(link_string, wiki:, slug:)
@uri = Addressable::URI.parse(link_string)
@wiki_base_path = wiki && wiki.wiki_base_path
@@ -37,8 +35,6 @@ module Banzai
# Of the form `./link`, `../link`, or similar
def apply_hierarchical_link_rules!
- return if slug_considered_unsafe?
-
@uri = Addressable::URI.join(@slug, @uri) if @uri.to_s[0] == '.'
end
@@ -58,10 +54,6 @@ module Banzai
def repository_upload?
@uri.relative? && @uri.path.starts_with?(Wikis::CreateAttachmentService::ATTACHMENT_PATH)
end
-
- def slug_considered_unsafe?
- UNSAFE_SLUG_REGEXES.any? { |r| r.match?(@slug) }
- end
end
end
end