summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Van Landuyt <bob@gitlab.com>2017-05-01 08:46:24 +0200
committerBob Van Landuyt <bob@gitlab.com>2017-05-02 20:55:46 +0200
commit38d2d166dee113a6e3bebe40ab2e135a26696c1d (patch)
tree5631f43b9e7d0c6f912073f0110d35d1fd749343
parent8d9ab6296d66eff06cf88556fef9f33fae1e058c (diff)
downloadgitlab-ce-38d2d166dee113a6e3bebe40ab2e135a26696c1d.tar.gz
Validate URLs using URI
And add the correct `rel` attributes to rendered HTML for markdown
-rw-r--r--changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml4
-rw-r--r--lib/banzai/filter/external_link_filter.rb36
-rw-r--r--spec/lib/banzai/filter/external_link_filter_spec.rb85
-rw-r--r--spec/lib/gitlab/asciidoc_spec.rb2
-rw-r--r--spec/lib/gitlab/other_markup_spec.rb2
5 files changed, 72 insertions, 57 deletions
diff --git a/changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml b/changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml
new file mode 100644
index 00000000000..03c4e531d73
--- /dev/null
+++ b/changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml
@@ -0,0 +1,4 @@
+---
+title: Validate URLs in markdown using URI to detect the host correctly
+merge_request:
+author:
diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb
index d67d466bce8..7d15a0f6d44 100644
--- a/lib/banzai/filter/external_link_filter.rb
+++ b/lib/banzai/filter/external_link_filter.rb
@@ -2,16 +2,17 @@ module Banzai
module Filter
# HTML Filter to modify the attributes of external links
class ExternalLinkFilter < HTML::Pipeline::Filter
+ SCHEMES = ['http', 'https', nil].freeze
+
def call
links.each do |node|
- href = href_to_lowercase_scheme(node["href"].to_s)
+ uri = uri(node['href'].to_s)
+ next unless uri
- unless node["href"].to_s == href
- node.set_attribute('href', href)
- end
+ node.set_attribute('href', uri.to_s)
- if href =~ %r{\A(https?:)?//[^/]} && external_url?(href)
- node.set_attribute('rel', 'nofollow noreferrer')
+ if SCHEMES.include?(uri.scheme) && external_url?(uri)
+ node.set_attribute('rel', 'nofollow noreferrer noopener')
node.set_attribute('target', '_blank')
end
end
@@ -21,27 +22,26 @@ module Banzai
private
+ def uri(href)
+ URI.parse(href)
+ rescue URI::InvalidURIError
+ nil
+ end
+
def links
query = 'descendant-or-self::a[@href and not(@href = "")]'
doc.xpath(query)
end
- def href_to_lowercase_scheme(href)
- scheme_match = href.match(/\A(\w+):\/\//)
-
- if scheme_match
- scheme_match.to_s.downcase + scheme_match.post_match
- else
- href
- end
- end
+ def external_url?(uri)
+ # Relative URLs miss a hostname
+ return false unless uri.hostname
- def external_url?(url)
- !url.start_with?(internal_url)
+ uri.hostname != internal_url.hostname
end
def internal_url
- @internal_url ||= Gitlab.config.gitlab.url
+ @internal_url ||= URI.parse(Gitlab.config.gitlab.url)
end
end
end
diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb
index d9e4525cb28..6f6c215be87 100644
--- a/spec/lib/banzai/filter/external_link_filter_spec.rb
+++ b/spec/lib/banzai/filter/external_link_filter_spec.rb
@@ -1,5 +1,22 @@
require 'spec_helper'
+shared_examples 'an external link with rel attribute' do
+ it 'adds rel="nofollow" to external links' do
+ expect(doc.at_css('a')).to have_attribute('rel')
+ expect(doc.at_css('a')['rel']).to include 'nofollow'
+ end
+
+ it 'adds rel="noreferrer" to external links' do
+ expect(doc.at_css('a')).to have_attribute('rel')
+ expect(doc.at_css('a')['rel']).to include 'noreferrer'
+ end
+
+ it 'adds rel="noopener" to external links' do
+ expect(doc.at_css('a')).to have_attribute('rel')
+ expect(doc.at_css('a')['rel']).to include 'noopener'
+ end
+end
+
describe Banzai::Filter::ExternalLinkFilter, lib: true do
include FilterSpecHelper
@@ -22,49 +39,51 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do
context 'for root links on document' do
let(:doc) { filter %q(<a href="https://google.com/">Google</a>) }
- it 'adds rel="nofollow" to external links' do
- expect(doc.at_css('a')).to have_attribute('rel')
- expect(doc.at_css('a')['rel']).to include 'nofollow'
- end
-
- it 'adds rel="noreferrer" to external links' do
- expect(doc.at_css('a')).to have_attribute('rel')
- expect(doc.at_css('a')['rel']).to include 'noreferrer'
- end
+ it_behaves_like 'an external link with rel attribute'
end
context 'for nested links on document' do
let(:doc) { filter %q(<p><a href="https://google.com/">Google</a></p>) }
- it 'adds rel="nofollow" to external links' do
- expect(doc.at_css('a')).to have_attribute('rel')
- expect(doc.at_css('a')['rel']).to include 'nofollow'
+ it_behaves_like 'an external link with rel attribute'
+ end
+
+ context 'for invalid urls' do
+ it 'skips broken hrefs' do
+ doc = filter %q(<p><a href="don't crash on broken urls">Google</a></p>)
+ expected = %q(<p><a href="don't%20crash%20on%20broken%20urls">Google</a></p>)
+
+ expect(doc.to_html).to eq(expected)
end
+ end
+
+ context 'for links with a username' do
+ context 'with a valid username' do
+ let(:doc) { filter %q(<a href="https://user@google.com/">Google</a>) }
- it 'adds rel="noreferrer" to external links' do
- expect(doc.at_css('a')).to have_attribute('rel')
- expect(doc.at_css('a')['rel']).to include 'noreferrer'
+ it_behaves_like 'an external link with rel attribute'
+ end
+
+ context 'with an impersonated username' do
+ let(:internal) { Gitlab.config.gitlab.url }
+
+ let(:doc) { filter %Q(<a href="https://#{internal}@example.com" target="_blank">Reverse Tabnabbing</a>) }
+
+ it_behaves_like 'an external link with rel attribute'
end
end
context 'for non-lowercase scheme links' do
- let(:doc_with_http) { filter %q(<p><a href="httP://google.com/">Google</a></p>) }
- let(:doc_with_https) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) }
-
- it 'adds rel="nofollow" to external links' do
- expect(doc_with_http.at_css('a')).to have_attribute('rel')
- expect(doc_with_https.at_css('a')).to have_attribute('rel')
+ context 'with http' do
+ let(:doc) { filter %q(<p><a href="httP://google.com/">Google</a></p>) }
- expect(doc_with_http.at_css('a')['rel']).to include 'nofollow'
- expect(doc_with_https.at_css('a')['rel']).to include 'nofollow'
+ it_behaves_like 'an external link with rel attribute'
end
- it 'adds rel="noreferrer" to external links' do
- expect(doc_with_http.at_css('a')).to have_attribute('rel')
- expect(doc_with_https.at_css('a')).to have_attribute('rel')
+ context 'with https' do
+ let(:doc) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) }
- expect(doc_with_http.at_css('a')['rel']).to include 'noreferrer'
- expect(doc_with_https.at_css('a')['rel']).to include 'noreferrer'
+ it_behaves_like 'an external link with rel attribute'
end
it 'skips internal links' do
@@ -84,14 +103,6 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do
context 'for protocol-relative links' do
let(:doc) { filter %q(<p><a href="//google.com/">Google</a></p>) }
- it 'adds rel="nofollow" to external links' do
- expect(doc.at_css('a')).to have_attribute('rel')
- expect(doc.at_css('a')['rel']).to include 'nofollow'
- end
-
- it 'adds rel="noreferrer" to external links' do
- expect(doc.at_css('a')).to have_attribute('rel')
- expect(doc.at_css('a')['rel']).to include 'noreferrer'
- end
+ it_behaves_like 'an external link with rel attribute'
end
end
diff --git a/spec/lib/gitlab/asciidoc_spec.rb b/spec/lib/gitlab/asciidoc_spec.rb
index 0f069cc306e..59d5440f408 100644
--- a/spec/lib/gitlab/asciidoc_spec.rb
+++ b/spec/lib/gitlab/asciidoc_spec.rb
@@ -69,7 +69,7 @@ module Gitlab
it 'adds the `rel` attribute to the link' do
output = render('link:https://google.com[Google]', context)
- expect(output).to include('rel="nofollow noreferrer"')
+ expect(output).to include('rel="nofollow noreferrer noopener"')
end
end
end
diff --git a/spec/lib/gitlab/other_markup_spec.rb b/spec/lib/gitlab/other_markup_spec.rb
index d6babd01971..cc9ebb8000c 100644
--- a/spec/lib/gitlab/other_markup_spec.rb
+++ b/spec/lib/gitlab/other_markup_spec.rb
@@ -17,7 +17,7 @@ describe Gitlab::OtherMarkup, lib: true do
context = {}
output = render('file.rdoc', '{Google}[https://google.com]', context)
- expect(output).to include('rel="nofollow noreferrer"')
+ expect(output).to include('rel="nofollow noreferrer noopener"')
end
end