diff options
Diffstat (limited to 'lib/banzai/filter/base_sanitization_filter.rb')
-rw-r--r-- | lib/banzai/filter/base_sanitization_filter.rb | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb new file mode 100644 index 00000000000..420e92cb1e8 --- /dev/null +++ b/lib/banzai/filter/base_sanitization_filter.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # Sanitize HTML produced by markup languages (Markdown, AsciiDoc...). + # Specific rules are implemented in dedicated filters: + # + # - Banzai::Filter::SanitizationFilter (Markdown) + # - Banzai::Filter::AsciiDocSanitizationFilter (AsciiDoc/Asciidoctor) + # + # Extends HTML::Pipeline::SanitizationFilter with common rules. + class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter + include Gitlab::Utils::StrongMemoize + + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + + def whitelist + strong_memoize(:whitelist) do + whitelist = super.deep_dup + + # Allow span elements + whitelist[:elements].push('span') + + # Allow data-math-style attribute in order to support LaTeX formatting + whitelist[:attributes]['code'] = %w(data-math-style) + whitelist[:attributes]['pre'] = %w(data-math-style) + + # Allow html5 details/summary elements + whitelist[:elements].push('details') + whitelist[:elements].push('summary') + + # Allow abbr elements with title attribute + whitelist[:elements].push('abbr') + whitelist[:attributes]['abbr'] = %w(title) + + # Disallow `name` attribute globally, allow on `a` + whitelist[:attributes][:all].delete('name') + whitelist[:attributes]['a'].push('name') + + # Allow any protocol in `a` elements + # and then remove links with unsafe protocols + whitelist[:protocols].delete('a') + whitelist[:transformers].push(self.class.remove_unsafe_links) + + # Remove `rel` attribute from `a` elements + whitelist[:transformers].push(self.class.remove_rel) + + customize_whitelist(whitelist) + end + end + + def customize_whitelist(whitelist) + raise NotImplementedError + end + + class << self + def remove_unsafe_links + lambda do |env| + node = env[:node] + + return unless node.name == 'a' + return unless node.has_attribute?('href') + + begin + node['href'] = node['href'].strip + uri = Addressable::URI.parse(node['href']) + + return unless uri.scheme + + # Remove all invalid scheme characters before checking against the + # list of unsafe protocols. + # + # See https://tools.ietf.org/html/rfc3986#section-3.1 + scheme = uri.scheme + .strip + .downcase + .gsub(/[^A-Za-z0-9\+\.\-]+/, '') + + node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) + rescue Addressable::URI::InvalidURIError + node.remove_attribute('href') + end + end + end + + def remove_rel + lambda do |env| + if env[:node_name] == 'a' + env[:node].remove_attribute('rel') + end + end + end + end + end + end +end |