diff options
-rw-r--r-- | app/helpers/gitlab_markdown_helper.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/markdown.rb | 34 | ||||
-rw-r--r-- | lib/gitlab/markdown/sanitization_filter.rb | 38 | ||||
-rw-r--r-- | spec/helpers/gitlab_markdown_helper_spec.rb | 33 | ||||
-rw-r--r-- | spec/lib/gitlab/markdown/sanitization_filter_spec.rb | 81 |
5 files changed, 123 insertions, 65 deletions
diff --git a/app/helpers/gitlab_markdown_helper.rb b/app/helpers/gitlab_markdown_helper.rb index 7dbffaae5f9..24263a0f619 100644 --- a/app/helpers/gitlab_markdown_helper.rb +++ b/app/helpers/gitlab_markdown_helper.rb @@ -34,7 +34,7 @@ module GitlabMarkdownHelper # see https://github.com/vmg/redcarpet#darling-i-packed-you-a-couple-renderers-for-lunch rend = Redcarpet::Render::GitlabHTML.new(self, user_color_scheme_class, { - # Handled further down the line by HTML::Pipeline::SanitizationFilter + # Handled further down the line by Gitlab::Markdown::SanitizationFilter escape_html: false }.merge(options)) diff --git a/lib/gitlab/markdown.rb b/lib/gitlab/markdown.rb index beb97bbdf41..e7ddaab5c2a 100644 --- a/lib/gitlab/markdown.rb +++ b/lib/gitlab/markdown.rb @@ -38,6 +38,7 @@ module Gitlab autoload :IssueReferenceFilter, 'gitlab/markdown/issue_reference_filter' autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter' autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter' + autoload :SanitizationFilter, 'gitlab/markdown/sanitization_filter' autoload :SnippetReferenceFilter, 'gitlab/markdown/snippet_reference_filter' autoload :TableOfContentsFilter, 'gitlab/markdown/table_of_contents_filter' autoload :UserReferenceFilter, 'gitlab/markdown/user_reference_filter' @@ -76,9 +77,6 @@ module Gitlab pipeline = HTML::Pipeline.new(filters) context = { - # SanitizationFilter - whitelist: sanitization_whitelist, - # EmojiFilter asset_root: Gitlab.config.gitlab.url, asset_host: Gitlab::Application.config.asset_host, @@ -116,10 +114,10 @@ module Gitlab # SanitizationFilter should come first so that all generated reference HTML # goes through untouched. # - # See https://gitlab.com/gitlab-org/html-pipeline-gitlab for more filters + # See https://github.com/jch/html-pipeline#filters for more filters. def filters [ - HTML::Pipeline::SanitizationFilter, + Gitlab::Markdown::SanitizationFilter, Gitlab::Markdown::EmojiFilter, Gitlab::Markdown::TableOfContentsFilter, @@ -136,32 +134,6 @@ module Gitlab ] end - # Customize the SanitizationFilter whitelist - # - # - Allow `class` and `id` attributes on all elements - # - Allow `span` elements - # - Remove `rel` attributes from `a` elements - # - Remove `a` nodes with `javascript:` in the `href` attribute - def sanitization_whitelist - whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST - whitelist[:attributes][:all].push('class', 'id') - whitelist[:elements].push('span') - - fix_anchors = lambda do |env| - name, node = env[:node_name], env[:node] - if name == 'a' - node.remove_attribute('rel') - if node['href'] && node['href'].match('javascript:') - node.remove_attribute('href') - end - end - end - - whitelist[:transformers].push(fix_anchors) - - whitelist - end - # Turn list items that start with "[ ]" into HTML checkbox inputs. def parse_tasks(text) li_tag = '<li class="task-list-item">' diff --git a/lib/gitlab/markdown/sanitization_filter.rb b/lib/gitlab/markdown/sanitization_filter.rb new file mode 100644 index 00000000000..9a154e0b2fe --- /dev/null +++ b/lib/gitlab/markdown/sanitization_filter.rb @@ -0,0 +1,38 @@ +require 'html/pipeline/filter' +require 'html/pipeline/sanitization_filter' + +module Gitlab + module Markdown + # Sanitize HTML + # + # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist. + class SanitizationFilter < HTML::Pipeline::SanitizationFilter + def whitelist + whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST + + # Allow `class` and `id` on all elements + whitelist[:attributes][:all].push('class', 'id') + + # Allow table alignment + whitelist[:attributes]['th'] = %w(style) + whitelist[:attributes]['td'] = %w(style) + + # Allow span elements + whitelist[:elements].push('span') + + # Remove `rel` attribute from `a` elements + whitelist[:transformers].push(remove_rel) + + whitelist + end + + def remove_rel + lambda do |env| + if env[:node_name] == 'a' + env[:node].remove_attribute('rel') + end + end + end + end + end +end diff --git a/spec/helpers/gitlab_markdown_helper_spec.rb b/spec/helpers/gitlab_markdown_helper_spec.rb index bd2240c5997..ff0f049ce6c 100644 --- a/spec/helpers/gitlab_markdown_helper_spec.rb +++ b/spec/helpers/gitlab_markdown_helper_spec.rb @@ -316,11 +316,6 @@ describe GitlabMarkdownHelper do expected = "" expect(markdown(actual)).to match(expected) end - - it 'should allow whitelisted HTML tags from the user' do - actual = '<dl><dt>Term</dt><dd>Definition</dd></dl>' - expect(markdown(actual)).to match(actual) - end end context 'with an empty repository' do @@ -336,34 +331,6 @@ describe GitlabMarkdownHelper do end end end - - # SANITIZATION ------------------------------------------------------------ - # TODO (rspeicher): These are testing SanitizationFilter, not `markdown` - - it 'should sanitize tags that are not whitelisted' do - actual = '<textarea>no inputs allowed</textarea> <blink>no blinks</blink>' - expected = 'no inputs allowed no blinks' - expect(markdown(actual)).to match(expected) - expect(markdown(actual)).not_to match('<.textarea>') - expect(markdown(actual)).not_to match('<.blink>') - end - - it 'should allow whitelisted tag attributes from the user' do - actual = '<a class="custom">link text</a>' - expect(markdown(actual)).to match(actual) - end - - it 'should sanitize tag attributes that are not whitelisted' do - actual = '<a href="http://example.com/bar.html" foo="bar">link text</a>' - expected = '<a href="http://example.com/bar.html">link text</a>' - expect(markdown(actual)).to match(expected) - end - - it 'should sanitize javascript in attributes' do - actual = %q(<a href="javascript:alert('foo')">link text</a>) - expected = '<a>link text</a>' - expect(markdown(actual)).to match(expected) - end end describe '#render_wiki_content' do diff --git a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb new file mode 100644 index 00000000000..ab909a68635 --- /dev/null +++ b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb @@ -0,0 +1,81 @@ +require 'spec_helper' + +module Gitlab::Markdown + describe SanitizationFilter do + def filter(html, options = {}) + described_class.call(html, options) + end + + describe 'default whitelist' do + it 'sanitizes tags that are not whitelisted' do + act = %q{<textarea>no inputs</textarea> and <blink>no blinks</blink>} + exp = 'no inputs and no blinks' + expect(filter(act).to_html).to eq exp + end + + it 'sanitizes tag attributes' do + act = %q{<a href="http://example.com/bar.html" onclick="bar">Text</a>} + exp = %q{<a href="http://example.com/bar.html">Text</a>} + expect(filter(act).to_html).to eq exp + end + + it 'sanitizes javascript in attributes' do + act = %q(<a href="javascript:alert('foo')">Text</a>) + exp = '<a>Text</a>' + expect(filter(act).to_html).to eq exp + end + + it 'allows whitelisted HTML tags from the user' do + exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>" + expect(filter(act).to_html).to eq exp + end + end + + describe 'custom whitelist' do + it 'allows `class` attribute on any element' do + exp = act = %q{<strong class="foo">Strong</strong>} + expect(filter(act).to_html).to eq exp + end + + it 'allows `id` attribute on any element' do + exp = act = %q{<em id="foo">Emphasis</em>} + expect(filter(act).to_html).to eq exp + end + + it 'allows `style` attribute on table elements' do + html = <<-HTML.strip_heredoc + <table> + <tr><th style="text-align: center">Head</th></tr> + <tr><td style="text-align: right">Body</th></tr> + </table> + HTML + + doc = filter(html) + + expect(doc.at_css('th')['style']).to eq 'text-align: center' + expect(doc.at_css('td')['style']).to eq 'text-align: right' + end + + it 'allows `span` elements' do + exp = act = %q{<span>Hello</span>} + expect(filter(act).to_html).to eq exp + end + + it 'removes `rel` attribute from `a` elements' do + doc = filter(%q{<a href="#" rel="nofollow">Link</a>}) + + expect(doc.css('a').size).to eq 1 + expect(doc.at_css('a')['href']).to eq '#' + expect(doc.at_css('a')['rel']).to be_nil + end + + it 'removes script-like `href` attribute from `a` elements' do + html = %q{<a href="javascript:alert('Hi')">Hi</a>} + doc = filter(html) + + expect(doc.css('a').size).to eq 1 + expect(doc.at_css('a')['href']).to be_nil + end + end + end +end |