blob: a4a06eae7b7f3e0c37167d043d4f6e9e7edaf8ca (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# frozen_string_literal: true
module Banzai
module Filter
# Sanitize HTML
#
# Extends HTML::Pipeline::SanitizationFilter with a custom whitelist.
class SanitizationFilter < HTML::Pipeline::SanitizationFilter
include Gitlab::Utils::StrongMemoize
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze
def whitelist
strong_memoize(:whitelist) do
customize_whitelist(super.deep_dup)
end
end
private
def customize_whitelist(whitelist)
# Allow table alignment; we whitelist specific text-align values in a
# transformer below
whitelist[:attributes]['th'] = %w(style)
whitelist[:attributes]['td'] = %w(style)
whitelist[:css] = { properties: ['text-align'] }
# Allow span elements
whitelist[:elements].push('span')
# Allow data-math-style attribute in order to support LaTeX formatting
whitelist[:attributes]['code'] = %w(data-math-style)
whitelist[:attributes]['pre'] = %w(data-math-style)
# Allow html5 details/summary elements
whitelist[:elements].push('details')
whitelist[:elements].push('summary')
# Allow abbr elements with title attribute
whitelist[:elements].push('abbr')
whitelist[:attributes]['abbr'] = %w(title)
# Allow the 'data-sourcepos' from CommonMark on all elements
whitelist[:attributes][:all].push('data-sourcepos')
# Disallow `name` attribute globally, allow on `a`
whitelist[:attributes][:all].delete('name')
whitelist[:attributes]['a'].push('name')
# Allow any protocol in `a` elements
# and then remove links with unsafe protocols
whitelist[:protocols].delete('a')
whitelist[:transformers].push(self.class.remove_unsafe_links)
# Remove `rel` attribute from `a` elements
whitelist[:transformers].push(self.class.remove_rel)
# Remove any `style` properties not required for table alignment
whitelist[:transformers].push(self.class.remove_unsafe_table_style)
# Allow `id` in a and li elements for footnotes
# and remove any `id` properties not matching for footnotes
whitelist[:attributes]['a'].push('id')
whitelist[:attributes]['li'] = %w(id)
whitelist[:transformers].push(self.class.remove_non_footnote_ids)
whitelist
end
class << self
def remove_unsafe_links
lambda do |env|
node = env[:node]
return unless node.name == 'a'
return unless node.has_attribute?('href')
begin
node['href'] = node['href'].strip
uri = Addressable::URI.parse(node['href'])
return unless uri.scheme
# Remove all invalid scheme characters before checking against the
# list of unsafe protocols.
#
# See https://tools.ietf.org/html/rfc3986#section-3.1
scheme = uri.scheme
.strip
.downcase
.gsub(/[^A-Za-z0-9\+\.\-]+/, '')
node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme)
rescue Addressable::URI::InvalidURIError
node.remove_attribute('href')
end
end
end
def remove_rel
lambda do |env|
if env[:node_name] == 'a'
env[:node].remove_attribute('rel')
end
end
end
def remove_unsafe_table_style
lambda do |env|
node = env[:node]
return unless node.name == 'th' || node.name == 'td'
return unless node.has_attribute?('style')
if node['style'] =~ TABLE_ALIGNMENT_PATTERN
node['style'] = "text-align: #{$~[:alignment]}"
else
node.remove_attribute('style')
end
end
end
def remove_non_footnote_ids
lambda do |env|
node = env[:node]
return unless node.name == 'a' || node.name == 'li'
return unless node.has_attribute?('id')
return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN
return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN
node.remove_attribute('id')
end
end
end
end
end
end
|