summaryrefslogtreecommitdiff
path: root/lib/banzai/filter/ascii_doc_sanitization_filter.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/banzai/filter/ascii_doc_sanitization_filter.rb')
-rw-r--r--lib/banzai/filter/ascii_doc_sanitization_filter.rb111
1 files changed, 111 insertions, 0 deletions
diff --git a/lib/banzai/filter/ascii_doc_sanitization_filter.rb b/lib/banzai/filter/ascii_doc_sanitization_filter.rb
new file mode 100644
index 00000000000..9105e86ad04
--- /dev/null
+++ b/lib/banzai/filter/ascii_doc_sanitization_filter.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+module Banzai
+ module Filter
+ # Sanitize HTML produced by AsciiDoc/Asciidoctor.
+ #
+ # Extends Banzai::Filter::BaseSanitizationFilter with specific rules.
+ class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter
+ # Section anchor link pattern
+ SECTION_LINK_REF_PATTERN = /\A#{Gitlab::Asciidoc::DEFAULT_ADOC_ATTRS['idprefix']}(:?[[:alnum:]]|-|_)+\z/.freeze
+ SECTION_HEADINGS = %w(h2 h3 h4 h5 h6).freeze
+
+ # Footnote link patterns
+ FOOTNOTE_LINK_ID_PATTERNS = {
+ a: /\A_footnoteref_\d+\z/,
+ div: /\A_footnotedef_\d+\z/
+ }.freeze
+
+ # Classes used by Asciidoctor to style components
+ ADMONITION_CLASSES = %w(fa icon-note icon-tip icon-warning icon-caution icon-important).freeze
+ CALLOUT_CLASSES = ['conum'].freeze
+ CHECKLIST_CLASSES = %w(fa fa-check-square-o fa-square-o).freeze
+ LIST_CLASSES = %w(checklist none no-bullet unnumbered unstyled).freeze
+
+ ELEMENT_CLASSES_WHITELIST = {
+ span: %w(big small underline overline line-through).freeze,
+ div: ['admonitionblock'].freeze,
+ td: ['icon'].freeze,
+ i: ADMONITION_CLASSES + CALLOUT_CLASSES + CHECKLIST_CLASSES,
+ ul: LIST_CLASSES,
+ ol: LIST_CLASSES,
+ a: ['anchor'].freeze
+ }.freeze
+
+ def customize_whitelist(whitelist)
+ # Allow marks
+ whitelist[:elements].push('mark')
+
+ # Allow any classes in `span`, `i`, `div`, `td`, `ul`, `ol` and `a` elements
+ # but then remove any unknown classes
+ whitelist[:attributes]['span'] = %w(class)
+ whitelist[:attributes]['div'].push('class')
+ whitelist[:attributes]['td'] = %w(class)
+ whitelist[:attributes]['i'] = %w(class)
+ whitelist[:attributes]['ul'] = %w(class)
+ whitelist[:attributes]['ol'] = %w(class)
+ whitelist[:attributes]['a'].push('class')
+ whitelist[:transformers].push(self.class.remove_element_classes)
+
+ # Allow `id` in heading elements for section anchors
+ SECTION_HEADINGS.each do |header|
+ whitelist[:attributes][header] = %w(id)
+ end
+ whitelist[:transformers].push(self.class.remove_non_heading_ids)
+
+ # Allow `id` in footnote elements
+ FOOTNOTE_LINK_ID_PATTERNS.keys.each do |element|
+ whitelist[:attributes][element.to_s].push('id')
+ end
+ whitelist[:transformers].push(self.class.remove_non_footnote_ids)
+
+ whitelist
+ end
+
+ class << self
+ def remove_non_footnote_ids
+ lambda do |env|
+ node = env[:node]
+
+ return unless (pattern = FOOTNOTE_LINK_ID_PATTERNS[node.name.to_sym])
+ return unless node.has_attribute?('id')
+
+ return if node['id'] =~ pattern
+
+ node.remove_attribute('id')
+ end
+ end
+
+ def remove_non_heading_ids
+ lambda do |env|
+ node = env[:node]
+
+ return unless SECTION_HEADINGS.any?(node.name)
+ return unless node.has_attribute?('id')
+
+ return if node['id'] =~ SECTION_LINK_REF_PATTERN
+
+ node.remove_attribute('id')
+ end
+ end
+
+ def remove_element_classes
+ lambda do |env|
+ node = env[:node]
+
+ return unless (classes_whitelist = ELEMENT_CLASSES_WHITELIST[node.name.to_sym])
+ return unless node.has_attribute?('class')
+
+ classes = node['class'].strip.split(' ')
+ allowed_classes = (classes & classes_whitelist)
+ if allowed_classes.empty?
+ node.remove_attribute('class')
+ else
+ node['class'] = allowed_classes.join(' ')
+ end
+ end
+ end
+ end
+ end
+ end
+end