From 5fd6497a76292c31c16f53ee86fbe8ca2307746d Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Wed, 27 May 2015 20:26:44 +0000 Subject: Merge branch 'rs-dont-follow-me' into 'master' Add ExternalLinkFilter to Markdown pipeline Forces a `rel="nofollow"` attribute on all external links. Addresses internal https://dev.gitlab.org/gitlab/gitlabhq/issues/2314#note_46525 See merge request !727 --- lib/gitlab/markdown.rb | 2 ++ lib/gitlab/markdown/external_link_filter.rb | 33 ++++++++++++++++++++++ spec/features/markdown_spec.rb | 14 ++++++++- spec/fixtures/markdown.md.erb | 9 +++++- .../gitlab/markdown/external_link_filter_spec.rb | 33 ++++++++++++++++++++++ 5 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 lib/gitlab/markdown/external_link_filter.rb create mode 100644 spec/lib/gitlab/markdown/external_link_filter_spec.rb diff --git a/lib/gitlab/markdown.rb b/lib/gitlab/markdown.rb index c0fb22e7f36..5db1566f55d 100644 --- a/lib/gitlab/markdown.rb +++ b/lib/gitlab/markdown.rb @@ -11,6 +11,7 @@ module Gitlab autoload :CommitReferenceFilter, 'gitlab/markdown/commit_reference_filter' autoload :EmojiFilter, 'gitlab/markdown/emoji_filter' autoload :ExternalIssueReferenceFilter, 'gitlab/markdown/external_issue_reference_filter' + autoload :ExternalLinkFilter, 'gitlab/markdown/external_link_filter' autoload :IssueReferenceFilter, 'gitlab/markdown/issue_reference_filter' autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter' autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter' @@ -103,6 +104,7 @@ module Gitlab Gitlab::Markdown::EmojiFilter, Gitlab::Markdown::TableOfContentsFilter, Gitlab::Markdown::AutolinkFilter, + Gitlab::Markdown::ExternalLinkFilter, Gitlab::Markdown::UserReferenceFilter, Gitlab::Markdown::IssueReferenceFilter, diff --git a/lib/gitlab/markdown/external_link_filter.rb b/lib/gitlab/markdown/external_link_filter.rb new file mode 100644 index 00000000000..c539e0fb823 --- /dev/null +++ b/lib/gitlab/markdown/external_link_filter.rb @@ -0,0 +1,33 @@ +require 'html/pipeline/filter' + +module Gitlab + module Markdown + # HTML Filter to add a `rel="nofollow"` attribute to external links + # + class ExternalLinkFilter < HTML::Pipeline::Filter + def call + doc.search('a').each do |node| + next unless node.has_attribute?('href') + + link = node.attribute('href').value + + # Skip non-HTTP(S) links + next unless link.start_with?('http') + + # Skip internal links + next if link.start_with?(internal_url) + + node.set_attribute('rel', 'nofollow') + end + + doc + end + + private + + def internal_url + @internal_url ||= Gitlab.config.gitlab.url + end + end + end +end diff --git a/spec/features/markdown_spec.rb b/spec/features/markdown_spec.rb index 8f3dfc8d5a9..c22928505b1 100644 --- a/spec/features/markdown_spec.rb +++ b/spec/features/markdown_spec.rb @@ -145,7 +145,7 @@ describe 'GitLab Markdown' do it 'removes `rel` attribute from links' do body = get_section('sanitizationfilter') - expect(body).not_to have_selector('a[rel]') + expect(body).not_to have_selector('a[rel="bookmark"]') end it "removes `href` from `a` elements if it's fishy" do @@ -233,6 +233,18 @@ describe 'GitLab Markdown' do end end + describe 'ExternalLinkFilter' do + let(:links) { get_section('externallinkfilter').next_element } + + it 'adds nofollow to external link' do + expect(links.css('a').first.to_html).to match 'nofollow' + end + + it 'ignores internal link' do + expect(links.css('a').last.to_html).not_to match 'nofollow' + end + end + describe 'ReferenceFilter' do it 'handles references in headers' do header = @doc.at_css('#reference-filters-eg-1').parent diff --git a/spec/fixtures/markdown.md.erb b/spec/fixtures/markdown.md.erb index 64817ec6700..9e02289103d 100644 --- a/spec/fixtures/markdown.md.erb +++ b/spec/fixtures/markdown.md.erb @@ -79,7 +79,7 @@ As permissive as it is, we've allowed even more stuff: span tag -This is a link with a defined rel attribute, which should be removed +This is a link with a defined rel attribute, which should be removed This is a link trying to be sneaky. It gets its link removed entirely. @@ -127,6 +127,13 @@ But it shouldn't autolink text inside certain tags: - http://about.gitlab.com/ - http://about.gitlab.com/ +### ExternalLinkFilter + +External links get a `rel="nofollow"` attribute: + +- [Google](https://google.com/) +- [GitLab Root](<%= Gitlab.config.gitlab.url %>) + ### Reference Filters (e.g., #<%= issue.iid %>) References should be parseable even inside _!<%= merge_request.iid %>_ emphasis. diff --git a/spec/lib/gitlab/markdown/external_link_filter_spec.rb b/spec/lib/gitlab/markdown/external_link_filter_spec.rb new file mode 100644 index 00000000000..c2ff4f80a42 --- /dev/null +++ b/spec/lib/gitlab/markdown/external_link_filter_spec.rb @@ -0,0 +1,33 @@ +require 'spec_helper' + +module Gitlab::Markdown + describe ExternalLinkFilter do + def filter(html, options = {}) + described_class.call(html, options) + end + + it 'ignores elements without an href attribute' do + exp = act = %q(Ignore Me) + expect(filter(act).to_html).to eq exp + end + + it 'ignores non-HTTP(S) links' do + exp = act = %q(IRC) + expect(filter(act).to_html).to eq exp + end + + it 'skips internal links' do + internal = Gitlab.config.gitlab.url + exp = act = %Q(Login) + expect(filter(act).to_html).to eq exp + end + + it 'adds rel="nofollow" to external links' do + act = %q(Google) + doc = filter(act) + + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to eq 'nofollow' + end + end +end -- cgit v1.2.1