diff options
Diffstat (limited to 'spec/lib/gitlab/markdown/filter/sanitization_filter_spec.rb')
-rw-r--r-- | spec/lib/gitlab/markdown/filter/sanitization_filter_spec.rb | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/spec/lib/gitlab/markdown/filter/sanitization_filter_spec.rb b/spec/lib/gitlab/markdown/filter/sanitization_filter_spec.rb new file mode 100644 index 00000000000..a5e5ee0e08a --- /dev/null +++ b/spec/lib/gitlab/markdown/filter/sanitization_filter_spec.rb @@ -0,0 +1,197 @@ +require 'spec_helper' + +describe Gitlab::Markdown::SanitizationFilter, lib: true do + include FilterSpecHelper + + describe 'default whitelist' do + it 'sanitizes tags that are not whitelisted' do + act = %q{<textarea>no inputs</textarea> and <blink>no blinks</blink>} + exp = 'no inputs and no blinks' + expect(filter(act).to_html).to eq exp + end + + it 'sanitizes tag attributes' do + act = %q{<a href="http://example.com/bar.html" onclick="bar">Text</a>} + exp = %q{<a href="http://example.com/bar.html">Text</a>} + expect(filter(act).to_html).to eq exp + end + + it 'sanitizes javascript in attributes' do + act = %q(<a href="javascript:alert('foo')">Text</a>) + exp = '<a>Text</a>' + expect(filter(act).to_html).to eq exp + end + + it 'allows whitelisted HTML tags from the user' do + exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>" + expect(filter(act).to_html).to eq exp + end + + it 'sanitizes `class` attribute on any element' do + act = %q{<strong class="foo">Strong</strong>} + expect(filter(act).to_html).to eq %q{<strong>Strong</strong>} + end + + it 'sanitizes `id` attribute on any element' do + act = %q{<em id="foo">Emphasis</em>} + expect(filter(act).to_html).to eq %q{<em>Emphasis</em>} + end + end + + describe 'custom whitelist' do + it 'customizes the whitelist only once' do + instance = described_class.new('Foo') + 3.times { instance.whitelist } + + expect(instance.whitelist[:transformers].size).to eq 5 + end + + it 'allows syntax highlighting' do + exp = act = %q{<pre class="code highlight white c"><code><span class="k">def</span></code></pre>} + expect(filter(act).to_html).to eq exp + end + + it 'sanitizes `class` attribute from non-highlight spans' do + act = %q{<span class="k">def</span>} + expect(filter(act).to_html).to eq %q{<span>def</span>} + end + + it 'allows `style` attribute on table elements' do + html = <<-HTML.strip_heredoc + <table> + <tr><th style="text-align: center">Head</th></tr> + <tr><td style="text-align: right">Body</th></tr> + </table> + HTML + + doc = filter(html) + + expect(doc.at_css('th')['style']).to eq 'text-align: center' + expect(doc.at_css('td')['style']).to eq 'text-align: right' + end + + it 'allows `span` elements' do + exp = act = %q{<span>Hello</span>} + expect(filter(act).to_html).to eq exp + end + + it 'removes `rel` attribute from `a` elements' do + act = %q{<a href="#" rel="nofollow">Link</a>} + exp = %q{<a href="#">Link</a>} + + expect(filter(act).to_html).to eq exp + end + + # Adapted from the Sanitize test suite: http://git.io/vczrM + protocols = { + 'protocol-based JS injection: simple, no spaces' => { + input: '<a href="javascript:alert(\'XSS\');">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: simple, spaces before' => { + input: '<a href="javascript :alert(\'XSS\');">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: simple, spaces after' => { + input: '<a href="javascript: alert(\'XSS\');">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: simple, spaces before and after' => { + input: '<a href="javascript : alert(\'XSS\');">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: preceding colon' => { + input: '<a href=":javascript:alert(\'XSS\');">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: UTF-8 encoding' => { + input: '<a href="javascript:">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: long UTF-8 encoding' => { + input: '<a href="javascript:">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: long UTF-8 encoding without semicolons' => { + input: '<a href=javascript:alert('XSS')>foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: hex encoding' => { + input: '<a href="javascript:">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: long hex encoding' => { + input: '<a href="javascript:">foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: hex encoding without semicolons' => { + input: '<a href=javascript:alert('XSS')>foo</a>', + output: '<a>foo</a>' + }, + + 'protocol-based JS injection: null char' => { + input: "<a href=java\0script:alert(\"XSS\")>foo</a>", + output: '<a href="java"></a>' + }, + + 'protocol-based JS injection: spaces and entities' => { + input: '<a href="  javascript:alert(\'XSS\');">foo</a>', + output: '<a href="">foo</a>' + }, + } + + protocols.each do |name, data| + it "handles #{name}" do + doc = filter(data[:input]) + + expect(doc.to_html).to eq data[:output] + end + end + + it 'allows non-standard anchor schemes' do + exp = %q{<a href="irc://irc.freenode.net/git">IRC</a>} + act = filter(exp) + + expect(act.to_html).to eq exp + end + + it 'allows relative links' do + exp = %q{<a href="foo/bar.md">foo/bar.md</a>} + act = filter(exp) + + expect(act.to_html).to eq exp + end + end + + context 'when inline_sanitization is true' do + it 'uses a stricter whitelist' do + doc = filter('<h1>Description</h1>', inline_sanitization: true) + expect(doc.to_html.strip).to eq 'Description' + end + + %w(pre code img ol ul li).each do |elem| + it "removes '#{elem}' elements" do + act = "<#{elem}>Description</#{elem}>" + expect(filter(act, inline_sanitization: true).to_html.strip). + to eq 'Description' + end + end + + %w(b i strong em a ins del sup sub p).each do |elem| + it "still allows '#{elem}' elements" do + exp = act = "<#{elem}>Description</#{elem}>" + expect(filter(act, inline_sanitization: true).to_html).to eq exp + end + end + end +end |