summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Speicher <rspeicher@gmail.com>2015-10-01 23:40:29 -0400
committerRobert Speicher <rspeicher@gmail.com>2015-10-01 23:40:29 -0400
commit16f8ca566b8637dc8092a6b630c23a82a905b437 (patch)
tree18aa607e127d8a62ed2b99208504674274d2b5e8
parentd7eceafb27225dee1b62181c21ae7cc240d984bf (diff)
downloadgitlab-ce-rs-dev-issue-2613.tar.gz
Add custom protocol whitelisting to SanitizationFilterrs-dev-issue-2613
Addresses internal https://dev.gitlab.org/gitlab/gitlabhq/issues/2613
-rw-r--r--lib/gitlab/markdown/sanitization_filter.rb19
-rw-r--r--spec/lib/gitlab/markdown/sanitization_filter_spec.rb101
2 files changed, 110 insertions, 10 deletions
diff --git a/lib/gitlab/markdown/sanitization_filter.rb b/lib/gitlab/markdown/sanitization_filter.rb
index e368de7d848..ffb9dc33b64 100644
--- a/lib/gitlab/markdown/sanitization_filter.rb
+++ b/lib/gitlab/markdown/sanitization_filter.rb
@@ -48,6 +48,12 @@ module Gitlab
# Allow span elements
whitelist[:elements].push('span')
+ # Allow any protocol in `a` elements...
+ whitelist[:protocols].delete('a')
+
+ # ...but then remove links with the `javascript` protocol
+ whitelist[:transformers].push(remove_javascript_links)
+
# Remove `rel` attribute from `a` elements
whitelist[:transformers].push(remove_rel)
@@ -57,6 +63,19 @@ module Gitlab
whitelist
end
+ def remove_javascript_links
+ lambda do |env|
+ node = env[:node]
+
+ return unless node.name == 'a'
+ return unless node.has_attribute?('href')
+
+ if node['href'].start_with?('javascript', ':javascript')
+ node.remove_attribute('href')
+ end
+ end
+ end
+
def remove_rel
lambda do |env|
if env[:node_name] == 'a'
diff --git a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb
index e50c82d0b3c..27cd00e8054 100644
--- a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb
+++ b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb
@@ -44,7 +44,7 @@ module Gitlab::Markdown
instance = described_class.new('Foo')
3.times { instance.whitelist }
- expect(instance.whitelist[:transformers].size).to eq 4
+ expect(instance.whitelist[:transformers].size).to eq 5
end
it 'allows syntax highlighting' do
@@ -77,19 +77,100 @@ module Gitlab::Markdown
end
it 'removes `rel` attribute from `a` elements' do
- doc = filter(%q{<a href="#" rel="nofollow">Link</a>})
+ act = %q{<a href="#" rel="nofollow">Link</a>}
+ exp = %q{<a href="#">Link</a>}
- expect(doc.css('a').size).to eq 1
- expect(doc.at_css('a')['href']).to eq '#'
- expect(doc.at_css('a')['rel']).to be_nil
+ expect(filter(act).to_html).to eq exp
end
- it 'removes script-like `href` attribute from `a` elements' do
- html = %q{<a href="javascript:alert('Hi')">Hi</a>}
- doc = filter(html)
+ # Adapted from the Sanitize test suite: http://git.io/vczrM
+ protocols = {
+ 'protocol-based JS injection: simple, no spaces' => {
+ input: '<a href="javascript:alert(\'XSS\');">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: simple, spaces before' => {
+ input: '<a href="javascript :alert(\'XSS\');">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: simple, spaces after' => {
+ input: '<a href="javascript: alert(\'XSS\');">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: simple, spaces before and after' => {
+ input: '<a href="javascript : alert(\'XSS\');">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: preceding colon' => {
+ input: '<a href=":javascript:alert(\'XSS\');">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: UTF-8 encoding' => {
+ input: '<a href="javascript&#58;">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: long UTF-8 encoding' => {
+ input: '<a href="javascript&#0058;">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
+ input: '<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: hex encoding' => {
+ input: '<a href="javascript&#x3A;">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: long hex encoding' => {
+ input: '<a href="javascript&#x003A;">foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: hex encoding without semicolons' => {
+ input: '<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>',
+ output: '<a>foo</a>'
+ },
+
+ 'protocol-based JS injection: null char' => {
+ input: "<a href=java\0script:alert(\"XSS\")>foo</a>",
+ output: '<a href="java"></a>'
+ },
+
+ 'protocol-based JS injection: spaces and entities' => {
+ input: '<a href=" &#14; javascript:alert(\'XSS\');">foo</a>',
+ output: '<a href="">foo</a>'
+ },
+ }
+
+ protocols.each do |name, data|
+ it "handles #{name}" do
+ doc = filter(data[:input])
+
+ expect(doc.to_html).to eq data[:output]
+ end
+ end
+
+ it 'allows non-standard anchor schemes' do
+ exp = %q{<a href="irc://irc.freenode.net/git">IRC</a>}
+ act = filter(exp)
+
+ expect(act.to_html).to eq exp
+ end
+
+ it 'allows relative links' do
+ exp = %q{<a href="foo/bar.md">foo/bar.md</a>}
+ act = filter(exp)
- expect(doc.css('a').size).to eq 1
- expect(doc.at_css('a')['href']).to be_nil
+ expect(act.to_html).to eq exp
end
end