diff options
author | Guillaume Grossetie <ggrossetie@gmail.com> | 2019-07-03 10:53:00 +0200 |
---|---|---|
committer | Guillaume Grossetie <ggrossetie@gmail.com> | 2019-07-12 09:35:50 +0200 |
commit | a546b9fbc5abdb010c19a2fb24e8df50001374f7 (patch) | |
tree | 794bd6bc4b055eef18046ca945936648ae055138 /lib/banzai | |
parent | dece84065f9dee04661e54af4f7016e7c50b63a6 (diff) | |
download | gitlab-ce-a546b9fbc5abdb010c19a2fb24e8df50001374f7.tar.gz |
Prevent excessive sanitization of AsciiDoc ouptut
Diffstat (limited to 'lib/banzai')
-rw-r--r-- | lib/banzai/filter/ascii_doc_sanitization_filter.rb | 62 | ||||
-rw-r--r-- | lib/banzai/filter/base_sanitization_filter.rb | 96 | ||||
-rw-r--r-- | lib/banzai/filter/sanitization_filter.rb | 82 | ||||
-rw-r--r-- | lib/banzai/pipeline/ascii_doc_pipeline.rb | 2 |
4 files changed, 163 insertions, 79 deletions
diff --git a/lib/banzai/filter/ascii_doc_sanitization_filter.rb b/lib/banzai/filter/ascii_doc_sanitization_filter.rb new file mode 100644 index 00000000000..a78bb60103c --- /dev/null +++ b/lib/banzai/filter/ascii_doc_sanitization_filter.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # Sanitize HTML produced by AsciiDoc/Asciidoctor. + # + # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. + class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter + # Classes used by Asciidoctor to style components + ADMONITION_CLASSES = %w(fa icon-note icon-tip icon-warning icon-caution icon-important).freeze + CALLOUT_CLASSES = ['conum'].freeze + CHECKLIST_CLASSES = %w(fa fa-check-square-o fa-square-o).freeze + + LIST_CLASSES = %w(checklist none no-bullet unnumbered unstyled).freeze + + ELEMENT_CLASSES_WHITELIST = { + span: %w(big small underline overline line-through).freeze, + div: ['admonitionblock'].freeze, + td: ['icon'].freeze, + i: ADMONITION_CLASSES + CALLOUT_CLASSES + CHECKLIST_CLASSES, + ul: LIST_CLASSES, + ol: LIST_CLASSES + }.freeze + + def customize_whitelist(whitelist) + # Allow marks + whitelist[:elements].push('mark') + + # Allow any classes in `span`, `i`, `div`, `td`, `ul` and `ol` elements + # but then remove any unknown classes + whitelist[:attributes]['span'] = %w(class) + whitelist[:attributes]['div'].push('class') + whitelist[:attributes]['td'] = %w(class) + whitelist[:attributes]['i'] = %w(class) + whitelist[:attributes]['ul'] = %w(class) + whitelist[:attributes]['ol'] = %w(class) + whitelist[:transformers].push(self.class.remove_element_classes) + + whitelist + end + + class << self + def remove_element_classes + lambda do |env| + node = env[:node] + + return unless (classes_whitelist = ELEMENT_CLASSES_WHITELIST[node.name.to_sym]) + return unless node.has_attribute?('class') + + classes = node['class'].strip.split(' ') + allowed_classes = (classes & classes_whitelist) + if allowed_classes.empty? + node.remove_attribute('class') + else + node['class'] = allowed_classes.join(' ') + end + end + end + end + end + end +end diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb new file mode 100644 index 00000000000..420e92cb1e8 --- /dev/null +++ b/lib/banzai/filter/base_sanitization_filter.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # Sanitize HTML produced by markup languages (Markdown, AsciiDoc...). + # Specific rules are implemented in dedicated filters: + # + # - Banzai::Filter::SanitizationFilter (Markdown) + # - Banzai::Filter::AsciiDocSanitizationFilter (AsciiDoc/Asciidoctor) + # + # Extends HTML::Pipeline::SanitizationFilter with common rules. + class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter + include Gitlab::Utils::StrongMemoize + + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + + def whitelist + strong_memoize(:whitelist) do + whitelist = super.deep_dup + + # Allow span elements + whitelist[:elements].push('span') + + # Allow data-math-style attribute in order to support LaTeX formatting + whitelist[:attributes]['code'] = %w(data-math-style) + whitelist[:attributes]['pre'] = %w(data-math-style) + + # Allow html5 details/summary elements + whitelist[:elements].push('details') + whitelist[:elements].push('summary') + + # Allow abbr elements with title attribute + whitelist[:elements].push('abbr') + whitelist[:attributes]['abbr'] = %w(title) + + # Disallow `name` attribute globally, allow on `a` + whitelist[:attributes][:all].delete('name') + whitelist[:attributes]['a'].push('name') + + # Allow any protocol in `a` elements + # and then remove links with unsafe protocols + whitelist[:protocols].delete('a') + whitelist[:transformers].push(self.class.remove_unsafe_links) + + # Remove `rel` attribute from `a` elements + whitelist[:transformers].push(self.class.remove_rel) + + customize_whitelist(whitelist) + end + end + + def customize_whitelist(whitelist) + raise NotImplementedError + end + + class << self + def remove_unsafe_links + lambda do |env| + node = env[:node] + + return unless node.name == 'a' + return unless node.has_attribute?('href') + + begin + node['href'] = node['href'].strip + uri = Addressable::URI.parse(node['href']) + + return unless uri.scheme + + # Remove all invalid scheme characters before checking against the + # list of unsafe protocols. + # + # See https://tools.ietf.org/html/rfc3986#section-3.1 + scheme = uri.scheme + .strip + .downcase + .gsub(/[^A-Za-z0-9\+\.\-]+/, '') + + node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) + rescue Addressable::URI::InvalidURIError + node.remove_attribute('href') + end + end + end + + def remove_rel + lambda do |env| + if env[:node_name] == 'a' + env[:node].remove_attribute('rel') + end + end + end + end + end + end +end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index a4a06eae7b7..f57e57890f8 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -2,23 +2,13 @@ module Banzai module Filter - # Sanitize HTML + # Sanitize HTML produced by Markdown. # - # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist. - class SanitizationFilter < HTML::Pipeline::SanitizationFilter - include Gitlab::Utils::StrongMemoize - - UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. + class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter + # Styles used by Markdown for table alignment TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze - def whitelist - strong_memoize(:whitelist) do - customize_whitelist(super.deep_dup) - end - end - - private - def customize_whitelist(whitelist) # Allow table alignment; we whitelist specific text-align values in a # transformer below @@ -26,36 +16,9 @@ module Banzai whitelist[:attributes]['td'] = %w(style) whitelist[:css] = { properties: ['text-align'] } - # Allow span elements - whitelist[:elements].push('span') - - # Allow data-math-style attribute in order to support LaTeX formatting - whitelist[:attributes]['code'] = %w(data-math-style) - whitelist[:attributes]['pre'] = %w(data-math-style) - - # Allow html5 details/summary elements - whitelist[:elements].push('details') - whitelist[:elements].push('summary') - - # Allow abbr elements with title attribute - whitelist[:elements].push('abbr') - whitelist[:attributes]['abbr'] = %w(title) - # Allow the 'data-sourcepos' from CommonMark on all elements whitelist[:attributes][:all].push('data-sourcepos') - # Disallow `name` attribute globally, allow on `a` - whitelist[:attributes][:all].delete('name') - whitelist[:attributes]['a'].push('name') - - # Allow any protocol in `a` elements - # and then remove links with unsafe protocols - whitelist[:protocols].delete('a') - whitelist[:transformers].push(self.class.remove_unsafe_links) - - # Remove `rel` attribute from `a` elements - whitelist[:transformers].push(self.class.remove_rel) - # Remove any `style` properties not required for table alignment whitelist[:transformers].push(self.class.remove_unsafe_table_style) @@ -69,43 +32,6 @@ module Banzai end class << self - def remove_unsafe_links - lambda do |env| - node = env[:node] - - return unless node.name == 'a' - return unless node.has_attribute?('href') - - begin - node['href'] = node['href'].strip - uri = Addressable::URI.parse(node['href']) - - return unless uri.scheme - - # Remove all invalid scheme characters before checking against the - # list of unsafe protocols. - # - # See https://tools.ietf.org/html/rfc3986#section-3.1 - scheme = uri.scheme - .strip - .downcase - .gsub(/[^A-Za-z0-9\+\.\-]+/, '') - - node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) - rescue Addressable::URI::InvalidURIError - node.remove_attribute('href') - end - end - end - - def remove_rel - lambda do |env| - if env[:node_name] == 'a' - env[:node].remove_attribute('rel') - end - end - end - def remove_unsafe_table_style lambda do |env| node = env[:node] diff --git a/lib/banzai/pipeline/ascii_doc_pipeline.rb b/lib/banzai/pipeline/ascii_doc_pipeline.rb index 6be489c6572..d25b74b23b2 100644 --- a/lib/banzai/pipeline/ascii_doc_pipeline.rb +++ b/lib/banzai/pipeline/ascii_doc_pipeline.rb @@ -5,7 +5,7 @@ module Banzai class AsciiDocPipeline < BasePipeline def self.filters FilterArray[ - Filter::SanitizationFilter, + Filter::AsciiDocSanitizationFilter, Filter::SyntaxHighlightFilter, Filter::ExternalLinkFilter, Filter::PlantumlFilter, |