diff options
author | Alex Groleau <agroleau@gitlab.com> | 2019-08-27 12:41:39 -0400 |
---|---|---|
committer | Alex Groleau <agroleau@gitlab.com> | 2019-08-27 12:41:39 -0400 |
commit | aa01f092829facd1044ad02f334422b7dbdc8b0e (patch) | |
tree | a754bf2497820432df7da0f2108bb7527a8dd7b8 /lib/banzai/filter/sanitization_filter.rb | |
parent | a1d9c9994a9a4d79b824c3fd9322688303ac8b03 (diff) | |
parent | 6b10779053ff4233c7a64c5ab57754fce63f6710 (diff) | |
download | gitlab-ce-runner-metrics-extractor.tar.gz |
Merge branch 'master' of gitlab_gitlab:gitlab-org/gitlab-cerunner-metrics-extractor
Diffstat (limited to 'lib/banzai/filter/sanitization_filter.rb')
-rw-r--r-- | lib/banzai/filter/sanitization_filter.rb | 82 |
1 files changed, 4 insertions, 78 deletions
diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index a4a06eae7b7..f57e57890f8 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -2,23 +2,13 @@ module Banzai module Filter - # Sanitize HTML + # Sanitize HTML produced by Markdown. # - # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist. - class SanitizationFilter < HTML::Pipeline::SanitizationFilter - include Gitlab::Utils::StrongMemoize - - UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. + class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter + # Styles used by Markdown for table alignment TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze - def whitelist - strong_memoize(:whitelist) do - customize_whitelist(super.deep_dup) - end - end - - private - def customize_whitelist(whitelist) # Allow table alignment; we whitelist specific text-align values in a # transformer below @@ -26,36 +16,9 @@ module Banzai whitelist[:attributes]['td'] = %w(style) whitelist[:css] = { properties: ['text-align'] } - # Allow span elements - whitelist[:elements].push('span') - - # Allow data-math-style attribute in order to support LaTeX formatting - whitelist[:attributes]['code'] = %w(data-math-style) - whitelist[:attributes]['pre'] = %w(data-math-style) - - # Allow html5 details/summary elements - whitelist[:elements].push('details') - whitelist[:elements].push('summary') - - # Allow abbr elements with title attribute - whitelist[:elements].push('abbr') - whitelist[:attributes]['abbr'] = %w(title) - # Allow the 'data-sourcepos' from CommonMark on all elements whitelist[:attributes][:all].push('data-sourcepos') - # Disallow `name` attribute globally, allow on `a` - whitelist[:attributes][:all].delete('name') - whitelist[:attributes]['a'].push('name') - - # Allow any protocol in `a` elements - # and then remove links with unsafe protocols - whitelist[:protocols].delete('a') - whitelist[:transformers].push(self.class.remove_unsafe_links) - - # Remove `rel` attribute from `a` elements - whitelist[:transformers].push(self.class.remove_rel) - # Remove any `style` properties not required for table alignment whitelist[:transformers].push(self.class.remove_unsafe_table_style) @@ -69,43 +32,6 @@ module Banzai end class << self - def remove_unsafe_links - lambda do |env| - node = env[:node] - - return unless node.name == 'a' - return unless node.has_attribute?('href') - - begin - node['href'] = node['href'].strip - uri = Addressable::URI.parse(node['href']) - - return unless uri.scheme - - # Remove all invalid scheme characters before checking against the - # list of unsafe protocols. - # - # See https://tools.ietf.org/html/rfc3986#section-3.1 - scheme = uri.scheme - .strip - .downcase - .gsub(/[^A-Za-z0-9\+\.\-]+/, '') - - node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) - rescue Addressable::URI::InvalidURIError - node.remove_attribute('href') - end - end - end - - def remove_rel - lambda do |env| - if env[:node_name] == 'a' - env[:node].remove_attribute('rel') - end - end - end - def remove_unsafe_table_style lambda do |env| node = env[:node] |