summaryrefslogtreecommitdiff
path: root/lib/banzai/filter/sanitization_filter.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/banzai/filter/sanitization_filter.rb')
-rw-r--r--lib/banzai/filter/sanitization_filter.rb82
1 files changed, 4 insertions, 78 deletions
diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb
index a4a06eae7b7..f57e57890f8 100644
--- a/lib/banzai/filter/sanitization_filter.rb
+++ b/lib/banzai/filter/sanitization_filter.rb
@@ -2,23 +2,13 @@
module Banzai
module Filter
- # Sanitize HTML
+ # Sanitize HTML produced by Markdown.
#
- # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist.
- class SanitizationFilter < HTML::Pipeline::SanitizationFilter
- include Gitlab::Utils::StrongMemoize
-
- UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
+ # Extends Banzai::Filter::BaseSanitizationFilter with specific rules.
+ class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter
+ # Styles used by Markdown for table alignment
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze
- def whitelist
- strong_memoize(:whitelist) do
- customize_whitelist(super.deep_dup)
- end
- end
-
- private
-
def customize_whitelist(whitelist)
# Allow table alignment; we whitelist specific text-align values in a
# transformer below
@@ -26,36 +16,9 @@ module Banzai
whitelist[:attributes]['td'] = %w(style)
whitelist[:css] = { properties: ['text-align'] }
- # Allow span elements
- whitelist[:elements].push('span')
-
- # Allow data-math-style attribute in order to support LaTeX formatting
- whitelist[:attributes]['code'] = %w(data-math-style)
- whitelist[:attributes]['pre'] = %w(data-math-style)
-
- # Allow html5 details/summary elements
- whitelist[:elements].push('details')
- whitelist[:elements].push('summary')
-
- # Allow abbr elements with title attribute
- whitelist[:elements].push('abbr')
- whitelist[:attributes]['abbr'] = %w(title)
-
# Allow the 'data-sourcepos' from CommonMark on all elements
whitelist[:attributes][:all].push('data-sourcepos')
- # Disallow `name` attribute globally, allow on `a`
- whitelist[:attributes][:all].delete('name')
- whitelist[:attributes]['a'].push('name')
-
- # Allow any protocol in `a` elements
- # and then remove links with unsafe protocols
- whitelist[:protocols].delete('a')
- whitelist[:transformers].push(self.class.remove_unsafe_links)
-
- # Remove `rel` attribute from `a` elements
- whitelist[:transformers].push(self.class.remove_rel)
-
# Remove any `style` properties not required for table alignment
whitelist[:transformers].push(self.class.remove_unsafe_table_style)
@@ -69,43 +32,6 @@ module Banzai
end
class << self
- def remove_unsafe_links
- lambda do |env|
- node = env[:node]
-
- return unless node.name == 'a'
- return unless node.has_attribute?('href')
-
- begin
- node['href'] = node['href'].strip
- uri = Addressable::URI.parse(node['href'])
-
- return unless uri.scheme
-
- # Remove all invalid scheme characters before checking against the
- # list of unsafe protocols.
- #
- # See https://tools.ietf.org/html/rfc3986#section-3.1
- scheme = uri.scheme
- .strip
- .downcase
- .gsub(/[^A-Za-z0-9\+\.\-]+/, '')
-
- node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme)
- rescue Addressable::URI::InvalidURIError
- node.remove_attribute('href')
- end
- end
- end
-
- def remove_rel
- lambda do |env|
- if env[:node_name] == 'a'
- env[:node].remove_attribute('rel')
- end
- end
- end
-
def remove_unsafe_table_style
lambda do |env|
node = env[:node]