diff options
author | Bob Van Landuyt <bob@gitlab.com> | 2017-05-01 08:46:24 +0200 |
---|---|---|
committer | Bob Van Landuyt <bob@gitlab.com> | 2017-05-02 20:55:46 +0200 |
commit | 38d2d166dee113a6e3bebe40ab2e135a26696c1d (patch) | |
tree | 5631f43b9e7d0c6f912073f0110d35d1fd749343 | |
parent | 8d9ab6296d66eff06cf88556fef9f33fae1e058c (diff) | |
download | gitlab-ce-38d2d166dee113a6e3bebe40ab2e135a26696c1d.tar.gz |
Validate URLs using URI
And add the correct `rel` attributes to rendered HTML for markdown
-rw-r--r-- | changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml | 4 | ||||
-rw-r--r-- | lib/banzai/filter/external_link_filter.rb | 36 | ||||
-rw-r--r-- | spec/lib/banzai/filter/external_link_filter_spec.rb | 85 | ||||
-rw-r--r-- | spec/lib/gitlab/asciidoc_spec.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/other_markup_spec.rb | 2 |
5 files changed, 72 insertions, 57 deletions
diff --git a/changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml b/changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml new file mode 100644 index 00000000000..03c4e531d73 --- /dev/null +++ b/changelogs/unreleased/bvl-validate-urls-in-markdown-using-uri.yml @@ -0,0 +1,4 @@ +--- +title: Validate URLs in markdown using URI to detect the host correctly +merge_request: +author: diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index d67d466bce8..7d15a0f6d44 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -2,16 +2,17 @@ module Banzai module Filter # HTML Filter to modify the attributes of external links class ExternalLinkFilter < HTML::Pipeline::Filter + SCHEMES = ['http', 'https', nil].freeze + def call links.each do |node| - href = href_to_lowercase_scheme(node["href"].to_s) + uri = uri(node['href'].to_s) + next unless uri - unless node["href"].to_s == href - node.set_attribute('href', href) - end + node.set_attribute('href', uri.to_s) - if href =~ %r{\A(https?:)?//[^/]} && external_url?(href) - node.set_attribute('rel', 'nofollow noreferrer') + if SCHEMES.include?(uri.scheme) && external_url?(uri) + node.set_attribute('rel', 'nofollow noreferrer noopener') node.set_attribute('target', '_blank') end end @@ -21,27 +22,26 @@ module Banzai private + def uri(href) + URI.parse(href) + rescue URI::InvalidURIError + nil + end + def links query = 'descendant-or-self::a[@href and not(@href = "")]' doc.xpath(query) end - def href_to_lowercase_scheme(href) - scheme_match = href.match(/\A(\w+):\/\//) - - if scheme_match - scheme_match.to_s.downcase + scheme_match.post_match - else - href - end - end + def external_url?(uri) + # Relative URLs miss a hostname + return false unless uri.hostname - def external_url?(url) - !url.start_with?(internal_url) + uri.hostname != internal_url.hostname end def internal_url - @internal_url ||= Gitlab.config.gitlab.url + @internal_url ||= URI.parse(Gitlab.config.gitlab.url) end end end diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index d9e4525cb28..6f6c215be87 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -1,5 +1,22 @@ require 'spec_helper' +shared_examples 'an external link with rel attribute' do + it 'adds rel="nofollow" to external links' do + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to include 'nofollow' + end + + it 'adds rel="noreferrer" to external links' do + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to include 'noreferrer' + end + + it 'adds rel="noopener" to external links' do + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to include 'noopener' + end +end + describe Banzai::Filter::ExternalLinkFilter, lib: true do include FilterSpecHelper @@ -22,49 +39,51 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do context 'for root links on document' do let(:doc) { filter %q(<a href="https://google.com/">Google</a>) } - it 'adds rel="nofollow" to external links' do - expect(doc.at_css('a')).to have_attribute('rel') - expect(doc.at_css('a')['rel']).to include 'nofollow' - end - - it 'adds rel="noreferrer" to external links' do - expect(doc.at_css('a')).to have_attribute('rel') - expect(doc.at_css('a')['rel']).to include 'noreferrer' - end + it_behaves_like 'an external link with rel attribute' end context 'for nested links on document' do let(:doc) { filter %q(<p><a href="https://google.com/">Google</a></p>) } - it 'adds rel="nofollow" to external links' do - expect(doc.at_css('a')).to have_attribute('rel') - expect(doc.at_css('a')['rel']).to include 'nofollow' + it_behaves_like 'an external link with rel attribute' + end + + context 'for invalid urls' do + it 'skips broken hrefs' do + doc = filter %q(<p><a href="don't crash on broken urls">Google</a></p>) + expected = %q(<p><a href="don't%20crash%20on%20broken%20urls">Google</a></p>) + + expect(doc.to_html).to eq(expected) end + end + + context 'for links with a username' do + context 'with a valid username' do + let(:doc) { filter %q(<a href="https://user@google.com/">Google</a>) } - it 'adds rel="noreferrer" to external links' do - expect(doc.at_css('a')).to have_attribute('rel') - expect(doc.at_css('a')['rel']).to include 'noreferrer' + it_behaves_like 'an external link with rel attribute' + end + + context 'with an impersonated username' do + let(:internal) { Gitlab.config.gitlab.url } + + let(:doc) { filter %Q(<a href="https://#{internal}@example.com" target="_blank">Reverse Tabnabbing</a>) } + + it_behaves_like 'an external link with rel attribute' end end context 'for non-lowercase scheme links' do - let(:doc_with_http) { filter %q(<p><a href="httP://google.com/">Google</a></p>) } - let(:doc_with_https) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) } - - it 'adds rel="nofollow" to external links' do - expect(doc_with_http.at_css('a')).to have_attribute('rel') - expect(doc_with_https.at_css('a')).to have_attribute('rel') + context 'with http' do + let(:doc) { filter %q(<p><a href="httP://google.com/">Google</a></p>) } - expect(doc_with_http.at_css('a')['rel']).to include 'nofollow' - expect(doc_with_https.at_css('a')['rel']).to include 'nofollow' + it_behaves_like 'an external link with rel attribute' end - it 'adds rel="noreferrer" to external links' do - expect(doc_with_http.at_css('a')).to have_attribute('rel') - expect(doc_with_https.at_css('a')).to have_attribute('rel') + context 'with https' do + let(:doc) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) } - expect(doc_with_http.at_css('a')['rel']).to include 'noreferrer' - expect(doc_with_https.at_css('a')['rel']).to include 'noreferrer' + it_behaves_like 'an external link with rel attribute' end it 'skips internal links' do @@ -84,14 +103,6 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do context 'for protocol-relative links' do let(:doc) { filter %q(<p><a href="//google.com/">Google</a></p>) } - it 'adds rel="nofollow" to external links' do - expect(doc.at_css('a')).to have_attribute('rel') - expect(doc.at_css('a')['rel']).to include 'nofollow' - end - - it 'adds rel="noreferrer" to external links' do - expect(doc.at_css('a')).to have_attribute('rel') - expect(doc.at_css('a')['rel']).to include 'noreferrer' - end + it_behaves_like 'an external link with rel attribute' end end diff --git a/spec/lib/gitlab/asciidoc_spec.rb b/spec/lib/gitlab/asciidoc_spec.rb index 0f069cc306e..59d5440f408 100644 --- a/spec/lib/gitlab/asciidoc_spec.rb +++ b/spec/lib/gitlab/asciidoc_spec.rb @@ -69,7 +69,7 @@ module Gitlab it 'adds the `rel` attribute to the link' do output = render('link:https://google.com[Google]', context) - expect(output).to include('rel="nofollow noreferrer"') + expect(output).to include('rel="nofollow noreferrer noopener"') end end end diff --git a/spec/lib/gitlab/other_markup_spec.rb b/spec/lib/gitlab/other_markup_spec.rb index d6babd01971..cc9ebb8000c 100644 --- a/spec/lib/gitlab/other_markup_spec.rb +++ b/spec/lib/gitlab/other_markup_spec.rb @@ -17,7 +17,7 @@ describe Gitlab::OtherMarkup, lib: true do context = {} output = render('file.rdoc', '{Google}[https://google.com]', context) - expect(output).to include('rel="nofollow noreferrer"') + expect(output).to include('rel="nofollow noreferrer noopener"') end end |