diff options
-rw-r--r-- | changelogs/unreleased/security-2769-idn-homograph-attack.yml | 5 | ||||
-rw-r--r-- | lib/banzai/filter/autolink_filter.rb | 13 | ||||
-rw-r--r-- | lib/banzai/filter/external_link_filter.rb | 85 | ||||
-rw-r--r-- | lib/banzai/pipeline/email_pipeline.rb | 1 | ||||
-rw-r--r-- | spec/lib/banzai/filter/autolink_filter_spec.rb | 16 | ||||
-rw-r--r-- | spec/lib/banzai/filter/external_link_filter_spec.rb | 65 | ||||
-rw-r--r-- | spec/lib/banzai/pipeline/email_pipeline_spec.rb | 14 | ||||
-rw-r--r-- | spec/lib/banzai/pipeline/full_pipeline_spec.rb | 38 |
8 files changed, 226 insertions, 11 deletions
diff --git a/changelogs/unreleased/security-2769-idn-homograph-attack.yml b/changelogs/unreleased/security-2769-idn-homograph-attack.yml new file mode 100644 index 00000000000..a014b522c96 --- /dev/null +++ b/changelogs/unreleased/security-2769-idn-homograph-attack.yml @@ -0,0 +1,5 @@ +--- +title: Make potentially malicious links more visible in the UI and scrub RTLO chars from links +merge_request: 2770 +author: +type: security diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index deda4b1872e..f3061bad4ff 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -8,6 +8,10 @@ module Banzai # # Based on HTML::Pipeline::AutolinkFilter # + # Note that our CommonMark parser, `commonmarker` (using the autolink extension) + # handles standard autolinking, like http/https. We detect additional + # schemes (smb, rdar, etc). + # # Context options: # :autolink - Boolean, skips all processing done by this filter when false # :link_attr - Hash of attributes for the generated links @@ -107,10 +111,13 @@ module Banzai end end - # match has come from node.to_html above, so we know it's encoded - # correctly. + # Since this came from a Text node, make sure the new href is encoded. + # `commonmarker` percent encodes the domains of links it handles, so + # do the same (instead of using `normalized_encode`). + href_safe = Addressable::URI.encode(match).html_safe + html_safe_match = match.html_safe - options = link_options.merge(href: html_safe_match) + options = link_options.merge(href: href_safe) content_tag(:a, html_safe_match, options) + dropped end diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 4f60b6f84c6..61ee3eac216 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -4,17 +4,29 @@ module Banzai module Filter # HTML Filter to modify the attributes of external links class ExternalLinkFilter < HTML::Pipeline::Filter - SCHEMES = ['http', 'https', nil].freeze + SCHEMES = ['http', 'https', nil].freeze + RTLO = "\u202E".freeze + ENCODED_RTLO = '%E2%80%AE'.freeze def call links.each do |node| - uri = uri(node['href'].to_s) - - node.set_attribute('href', uri.to_s) if uri + # URI.parse does stricter checking on the url than Addressable, + # such as on `mailto:` links. Since we've been using it, do an + # initial parse for validity and then use Addressable + # for IDN support, etc + uri = uri_strict(node['href'].to_s) + if uri + node.set_attribute('href', uri.to_s) + addressable_uri = addressable_uri(node['href']) + else + addressable_uri = nil + end - if SCHEMES.include?(uri&.scheme) && !internal_url?(uri) - node.set_attribute('rel', 'nofollow noreferrer noopener') - node.set_attribute('target', '_blank') + unless internal_url?(addressable_uri) + punycode_autolink_node!(addressable_uri, node) + sanitize_link_text!(node) + add_malicious_tooltip!(addressable_uri, node) + add_nofollow!(addressable_uri, node) end end @@ -23,12 +35,18 @@ module Banzai private - def uri(href) + def uri_strict(href) URI.parse(href) rescue URI::Error nil end + def addressable_uri(href) + Addressable::URI.parse(href) + rescue Addressable::URI::InvalidURIError + nil + end + def links query = 'descendant-or-self::a[@href and not(@href = "")]' doc.xpath(query) @@ -45,6 +63,57 @@ module Banzai def internal_url @internal_url ||= URI.parse(Gitlab.config.gitlab.url) end + + # Only replace an autolink with an IDN with it's punycode + # version if we need emailable links. Otherwise let it + # be shown normally and the tooltips will show the + # punycode version. + def punycode_autolink_node!(uri, node) + return unless uri + return unless context[:emailable_links] + + unencoded_uri_str = Addressable::URI.unencode(node['href']) + + if unencoded_uri_str == node.content && idn?(uri) + node.content = uri.normalize + end + end + + # escape any right-to-left (RTLO) characters in link text + def sanitize_link_text!(node) + node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO) + end + + # If the domain is an international domain name (IDN), + # let's expose with a tooltip in case it's intended + # to be malicious. This is particularly useful for links + # where the link text is not the same as the actual link. + # We will continue to show the unicode version of the domain + # in autolinked link text, which could contain emojis, etc. + # + # Also show the tooltip if the url contains the RTLO character, + # as this is an indicator of a malicious link + def add_malicious_tooltip!(uri, node) + if idn?(uri) || has_encoded_rtlo?(uri) + node.add_class('has-tooltip') + node.set_attribute('title', uri.normalize) + end + end + + def add_nofollow!(uri, node) + if SCHEMES.include?(uri&.scheme) + node.set_attribute('rel', 'nofollow noreferrer noopener') + node.set_attribute('target', '_blank') + end + end + + def idn?(uri) + uri&.normalized_host&.start_with?('xn--') + end + + def has_encoded_rtlo?(uri) + uri&.to_s&.include?(ENCODED_RTLO) + end end end end diff --git a/lib/banzai/pipeline/email_pipeline.rb b/lib/banzai/pipeline/email_pipeline.rb index 0f4dd9d143d..13e6a990407 100644 --- a/lib/banzai/pipeline/email_pipeline.rb +++ b/lib/banzai/pipeline/email_pipeline.rb @@ -12,6 +12,7 @@ module Banzai def self.transform_context(context) super(context).merge( only_path: false, + emailable_links: true, no_sourcepos: true ) end diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb index 7a457403b51..6217381c491 100644 --- a/spec/lib/banzai/filter/autolink_filter_spec.rb +++ b/spec/lib/banzai/filter/autolink_filter_spec.rb @@ -188,6 +188,22 @@ describe Banzai::Filter::AutolinkFilter do expect(doc.at_css('a')['class']).to eq 'custom' end + it 'escapes RTLO and other characters' do + # rendered text looks like "http://example.com/evilexe.mp3" + evil_link = "#{link}evil\u202E3pm.exe" + doc = filter("#{evil_link}") + + expect(doc.at_css('a')['href']).to eq "http://about.gitlab.com/evil%E2%80%AE3pm.exe" + end + + it 'encodes international domains' do + link = "http://one😄two.com" + expected = "http://one%F0%9F%98%84two.com" + doc = filter(link) + + expect(doc.at_css('a')['href']).to eq expected + end + described_class::IGNORE_PARENTS.each do |elem| it "ignores valid links contained inside '#{elem}' element" do exp = act = "<#{elem}>See #{link}</#{elem}>" diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index e6dae8d5382..2acbe05f082 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -62,6 +62,13 @@ describe Banzai::Filter::ExternalLinkFilter do expect(doc.to_html).to eq(expected) end + + it 'adds rel and target to improperly formatted autolinks' do + doc = filter %q(<p><a href="mailto://jblogs@example.com">mailto://jblogs@example.com</a></p>) + expected = %q(<p><a href="mailto://jblogs@example.com" rel="nofollow noreferrer noopener" target="_blank">mailto://jblogs@example.com</a></p>) + + expect(doc.to_html).to eq(expected) + end end context 'for links with a username' do @@ -112,4 +119,62 @@ describe Banzai::Filter::ExternalLinkFilter do it_behaves_like 'an external link with rel attribute' end + + context 'links with RTLO character' do + # In rendered text this looks like "http://example.com/evilexe.mp3" + let(:doc) { filter %Q(<a href="http://example.com/evil%E2%80%AE3pm.exe">http://example.com/evil\u202E3pm.exe</a>) } + + it_behaves_like 'an external link with rel attribute' + + it 'escapes RTLO in link text' do + expected = %q(http://example.com/evil%E2%80%AE3pm.exe</a>) + + expect(doc.to_html).to include(expected) + end + + it 'does not mangle the link text' do + doc = filter %Q(<a href="http://example.com">One<span>and</span>\u202Eexe.mp3</a>) + + expect(doc.to_html).to include('One<span>and</span>%E2%80%AEexe.mp3</a>') + end + end + + context 'for generated autolinks' do + context 'with an IDN character' do + let(:doc) { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>)) } + let(:doc_email) { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>), emailable_links: true) } + + it_behaves_like 'an external link with rel attribute' + + it 'does not change the link text' do + expect(doc.to_html).to include('http://exa😄mple.com</a>') + end + + it 'uses punycode for emails' do + expect(doc_email.to_html).to include('http://xn--example-6p25f.com/</a>') + end + end + end + + context 'for links that look malicious' do + context 'with an IDN character' do + let(:doc) { filter %q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>) } + + it 'adds a toolip with punycode' do + expect(doc.to_html).to include('http://exa😄mple.com</a>') + expect(doc.to_html).to include('class="has-tooltip"') + expect(doc.to_html).to include('title="http://xn--example-6p25f.com/"') + end + end + + context 'with RTLO character' do + let(:doc) { filter %q(<a href="http://example.com/evil%E2%80%AE3pm.exe">Evil Test</a>) } + + it 'adds a toolip with punycode' do + expect(doc.to_html).to include('Evil Test</a>') + expect(doc.to_html).to include('class="has-tooltip"') + expect(doc.to_html).to include('title="http://example.com/evil%E2%80%AE3pm.exe"') + end + end + end end diff --git a/spec/lib/banzai/pipeline/email_pipeline_spec.rb b/spec/lib/banzai/pipeline/email_pipeline_spec.rb index 6a11ca2f9d5..b99161109eb 100644 --- a/spec/lib/banzai/pipeline/email_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/email_pipeline_spec.rb @@ -10,5 +10,19 @@ describe Banzai::Pipeline::EmailPipeline do expect(described_class.filters).not_to be_empty expect(described_class.filters).not_to include(Banzai::Filter::ImageLazyLoadFilter) end + + it 'shows punycode for autolinks' do + examples = %W[ + http://one😄two.com + http://\u0261itlab.com + ] + + examples.each do |markdown| + result = described_class.call(markdown, project: nil)[:output] + link = result.css('a').first + + expect(link.content).to include('http://xn--') + end + end end end diff --git a/spec/lib/banzai/pipeline/full_pipeline_spec.rb b/spec/lib/banzai/pipeline/full_pipeline_spec.rb index aa503b6e1d5..3d3aa64d630 100644 --- a/spec/lib/banzai/pipeline/full_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/full_pipeline_spec.rb @@ -59,4 +59,42 @@ describe Banzai::Pipeline::FullPipeline do expect(html.lines.map(&:strip).join("\n")).to eq filtered_footnote end end + + describe 'links are detected as malicious' do + it 'has tooltips for malicious links' do + examples = %W[ + http://example.com/evil\u202E3pm.exe + [evilexe.mp3](http://example.com/evil\u202E3pm.exe) + rdar://localhost.com/\u202E3pm.exe + http://one😄two.com + [Evil-Test](http://one😄two.com) + http://\u0261itlab.com + [Evil-GitLab-link](http://\u0261itlab.com) + ![Evil-GitLab-link](http://\u0261itlab.com.png) + ] + + examples.each do |markdown| + result = described_class.call(markdown, project: nil)[:output] + link = result.css('a').first + + expect(link[:class]).to include('has-tooltip') + end + end + + it 'has no tooltips for safe links' do + examples = %w[ + http://example.com + [Safe-Test](http://example.com) + https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg + [Wikipedia-link](https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg) + ] + + examples.each do |markdown| + result = described_class.call(markdown, project: nil)[:output] + link = result.css('a').first + + expect(link[:class]).to be_nil + end + end + end end |