diff options
| -rw-r--r-- | doc/markdown/markdown.md | 2 | ||||
| -rw-r--r-- | lib/gitlab/markdown/sanitization_filter.rb | 19 | ||||
| -rw-r--r-- | spec/features/markdown_spec.rb | 28 | ||||
| -rw-r--r-- | spec/fixtures/markdown.md.erb | 26 | ||||
| -rw-r--r-- | spec/lib/gitlab/markdown/sanitization_filter_spec.rb | 20 | 
5 files changed, 59 insertions, 36 deletions
| diff --git a/doc/markdown/markdown.md b/doc/markdown/markdown.md index e95ddbb7578..30c29084e34 100644 --- a/doc/markdown/markdown.md +++ b/doc/markdown/markdown.md @@ -423,7 +423,7 @@ Quote break.  You can also use raw HTML in your Markdown, and it'll mostly work pretty well. -See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes.  In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements, as well as the `class`, and `id` attributes on all elements. +See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes.  In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements.  ```no-highlight  <dl> diff --git a/lib/gitlab/markdown/sanitization_filter.rb b/lib/gitlab/markdown/sanitization_filter.rb index 9a154e0b2fe..6f33155badf 100644 --- a/lib/gitlab/markdown/sanitization_filter.rb +++ b/lib/gitlab/markdown/sanitization_filter.rb @@ -10,8 +10,9 @@ module Gitlab        def whitelist          whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST -        # Allow `class` and `id` on all elements -        whitelist[:attributes][:all].push('class', 'id') +        # Allow code highlighting +        whitelist[:attributes]['pre'] = %w(class) +        whitelist[:attributes]['span'] = %w(class)          # Allow table alignment          whitelist[:attributes]['th'] = %w(style) @@ -23,6 +24,9 @@ module Gitlab          # Remove `rel` attribute from `a` elements          whitelist[:transformers].push(remove_rel) +        # Remove `class` attribute from non-highlight spans +        whitelist[:transformers].push(clean_spans) +          whitelist        end @@ -33,6 +37,17 @@ module Gitlab            end          end        end + +      def clean_spans +        lambda do |env| +          return unless env[:node_name] == 'span' +          return unless env[:node].has_attribute?('class') + +          unless has_ancestor?(env[:node], 'pre') +            env[:node].remove_attribute('class') +          end +        end +      end      end    end  end diff --git a/spec/features/markdown_spec.rb b/spec/features/markdown_spec.rb index 1746ce128e4..8f3dfc8d5a9 100644 --- a/spec/features/markdown_spec.rb +++ b/spec/features/markdown_spec.rb @@ -60,8 +60,8 @@ describe 'GitLab Markdown' do      @feat.teardown    end -  # Given a header ID, goes to that element's parent (the header), then to its -  # second sibling (the body). +  # Given a header ID, goes to that element's parent (the header itself), then +  # its next sibling element (the body).    def get_section(id)      @doc.at_css("##{id}").parent.next_element    end @@ -119,18 +119,18 @@ describe 'GitLab Markdown' do    describe 'HTML::Pipeline' do      describe 'SanitizationFilter' do        it 'uses a permissive whitelist' do -        expect(@doc).to have_selector('b#manual-b') -        expect(@doc).to have_selector('em#manual-em') -        expect(@doc).to have_selector("code#manual-code") +        expect(@doc).to have_selector('b:contains("b tag")') +        expect(@doc).to have_selector('em:contains("em tag")') +        expect(@doc).to have_selector('code:contains("code tag")')          expect(@doc).to have_selector('kbd:contains("s")')          expect(@doc).to have_selector('strike:contains(Emoji)') -        expect(@doc).to have_selector('img#manual-img') -        expect(@doc).to have_selector('br#manual-br') -        expect(@doc).to have_selector('hr#manual-hr') +        expect(@doc).to have_selector('img[src*="smile.png"]') +        expect(@doc).to have_selector('br') +        expect(@doc).to have_selector('hr')        end        it 'permits span elements' do -        expect(@doc).to have_selector('span#span-class-light.light') +        expect(@doc).to have_selector('span:contains("span tag")')        end        it 'permits table alignment' do @@ -144,13 +144,12 @@ describe 'GitLab Markdown' do        end        it 'removes `rel` attribute from links' do -        expect(@doc).to have_selector('a#a-rel-nofollow') -        expect(@doc).not_to have_selector('a#a-rel-nofollow[rel]') +        body = get_section('sanitizationfilter') +        expect(body).not_to have_selector('a[rel]')        end        it "removes `href` from `a` elements if it's fishy" do -        expect(@doc).to have_selector('a#a-href-javascript') -        expect(@doc).not_to have_selector('a#a-href-javascript[href]') +        expect(@doc).not_to have_selector('a[href*="javascript"]')        end      end @@ -228,7 +227,8 @@ describe 'GitLab Markdown' do        %w(code a kbd).each do |elem|          it "ignores links inside '#{elem}' element" do -          expect(@doc.at_css("#{elem}#autolink-#{elem}").child).to be_text +          body = get_section('autolinkfilter') +          expect(body).not_to have_selector("#{elem} a")          end        end      end diff --git a/spec/fixtures/markdown.md.erb b/spec/fixtures/markdown.md.erb index bc023ecf793..64817ec6700 100644 --- a/spec/fixtures/markdown.md.erb +++ b/spec/fixtures/markdown.md.erb @@ -54,36 +54,34 @@ After the Markdown has been turned into HTML, it gets passed through...  ### SanitizationFilter -GitLab uses <a href="http://git.io/vfW8a" class="sanitize" id="sanitize-link">HTML::Pipeline::SanitizationFilter</a> +GitLab uses <a href="http://git.io/vfW8a">HTML::Pipeline::SanitizationFilter</a>  to sanitize the generated HTML, stripping dangerous or unwanted tags.  Its default whitelist is pretty permissive. Check it: -<b id="manual-b">This text is bold</b> and <em id="manual-em">this text is emphasized</em>. +<b>b tag</b> and <em>em tag</em>. -<code id="manual-code">echo "Hello, world!"</code> +<code>code tag</code>  Press <kbd>s</kbd> to search. -<strike>Emoji</strike> Plain old images! <img -src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20" -height="20" id="manual-img" /> +<strike>Emoji</strike> Plain old images! <img src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20" height="20" />  Here comes a line break: -<br id="manual-br" /> +<br />  And a horizontal rule: -<hr id="manual-hr" /> +<hr />  As permissive as it is, we've allowed even more stuff: -<span class="light" id="span-class-light">Span elements</span> +<span>span tag</span> -<a href="#" rel="nofollow" id="a-rel-nofollow">This is a link with a defined rel attribute, which should be removed</a> +<a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a> -<a href="javascript:alert('Hi')" id="a-href-javascript">This is a link trying to be sneaky. It gets its link removed entirely.</a> +<a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>  ### Escaping @@ -125,9 +123,9 @@ These are all plain text that should get turned into links:  But it shouldn't autolink text inside certain tags: -- <code id="autolink-code">http://about.gitlab.com/</code> -- <a id="autolink-a">http://about.gitlab.com/</a> -- <kbd id="autolink-kbd">http://about.gitlab.com/</kbd> +- <code>http://about.gitlab.com/</code> +- <a>http://about.gitlab.com/</a> +- <kbd>http://about.gitlab.com/</kbd>  ### Reference Filters (e.g., #<%= issue.iid %>) diff --git a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb index ab909a68635..4a1aa766149 100644 --- a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb +++ b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb @@ -29,17 +29,27 @@ module Gitlab::Markdown          exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>"          expect(filter(act).to_html).to eq exp        end + +      it 'sanitizes `class` attribute on any element' do +        act = %q{<strong class="foo">Strong</strong>} +        expect(filter(act).to_html).to eq %q{<strong>Strong</strong>} +      end + +      it 'sanitizes `id` attribute on any element' do +        act = %q{<em id="foo">Emphasis</em>} +        expect(filter(act).to_html).to eq %q{<em>Emphasis</em>} +      end      end      describe 'custom whitelist' do -      it 'allows `class` attribute on any element' do -        exp = act = %q{<strong class="foo">Strong</strong>} +      it 'allows syntax highlighting' do +        exp = act = %q{<pre class="code highlight white c"><code><span class="k">def</span></code></pre>}          expect(filter(act).to_html).to eq exp        end -      it 'allows `id` attribute on any element' do -        exp = act = %q{<em id="foo">Emphasis</em>} -        expect(filter(act).to_html).to eq exp +      it 'sanitizes `class` attribute from non-highlight spans' do +        act = %q{<span class="k">def</span>} +        expect(filter(act).to_html).to eq %q{<span>def</span>}        end        it 'allows `style` attribute on table elements' do | 
