summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--changelogs/unreleased/security-2769-idn-homograph-attack.yml5
-rw-r--r--lib/banzai/filter/autolink_filter.rb13
-rw-r--r--lib/banzai/filter/external_link_filter.rb85
-rw-r--r--lib/banzai/pipeline/email_pipeline.rb1
-rw-r--r--spec/lib/banzai/filter/autolink_filter_spec.rb16
-rw-r--r--spec/lib/banzai/filter/external_link_filter_spec.rb65
-rw-r--r--spec/lib/banzai/pipeline/email_pipeline_spec.rb14
-rw-r--r--spec/lib/banzai/pipeline/full_pipeline_spec.rb38
8 files changed, 226 insertions, 11 deletions
diff --git a/changelogs/unreleased/security-2769-idn-homograph-attack.yml b/changelogs/unreleased/security-2769-idn-homograph-attack.yml
new file mode 100644
index 00000000000..a014b522c96
--- /dev/null
+++ b/changelogs/unreleased/security-2769-idn-homograph-attack.yml
@@ -0,0 +1,5 @@
+---
+title: Make potentially malicious links more visible in the UI and scrub RTLO chars from links
+merge_request: 2770
+author:
+type: security
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index deda4b1872e..f3061bad4ff 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -8,6 +8,10 @@ module Banzai
#
# Based on HTML::Pipeline::AutolinkFilter
#
+ # Note that our CommonMark parser, `commonmarker` (using the autolink extension)
+ # handles standard autolinking, like http/https. We detect additional
+ # schemes (smb, rdar, etc).
+ #
# Context options:
# :autolink - Boolean, skips all processing done by this filter when false
# :link_attr - Hash of attributes for the generated links
@@ -107,10 +111,13 @@ module Banzai
end
end
- # match has come from node.to_html above, so we know it's encoded
- # correctly.
+ # Since this came from a Text node, make sure the new href is encoded.
+ # `commonmarker` percent encodes the domains of links it handles, so
+ # do the same (instead of using `normalized_encode`).
+ href_safe = Addressable::URI.encode(match).html_safe
+
html_safe_match = match.html_safe
- options = link_options.merge(href: html_safe_match)
+ options = link_options.merge(href: href_safe)
content_tag(:a, html_safe_match, options) + dropped
end
diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb
index 4f60b6f84c6..61ee3eac216 100644
--- a/lib/banzai/filter/external_link_filter.rb
+++ b/lib/banzai/filter/external_link_filter.rb
@@ -4,17 +4,29 @@ module Banzai
module Filter
# HTML Filter to modify the attributes of external links
class ExternalLinkFilter < HTML::Pipeline::Filter
- SCHEMES = ['http', 'https', nil].freeze
+ SCHEMES = ['http', 'https', nil].freeze
+ RTLO = "\u202E".freeze
+ ENCODED_RTLO = '%E2%80%AE'.freeze
def call
links.each do |node|
- uri = uri(node['href'].to_s)
-
- node.set_attribute('href', uri.to_s) if uri
+ # URI.parse does stricter checking on the url than Addressable,
+ # such as on `mailto:` links. Since we've been using it, do an
+ # initial parse for validity and then use Addressable
+ # for IDN support, etc
+ uri = uri_strict(node['href'].to_s)
+ if uri
+ node.set_attribute('href', uri.to_s)
+ addressable_uri = addressable_uri(node['href'])
+ else
+ addressable_uri = nil
+ end
- if SCHEMES.include?(uri&.scheme) && !internal_url?(uri)
- node.set_attribute('rel', 'nofollow noreferrer noopener')
- node.set_attribute('target', '_blank')
+ unless internal_url?(addressable_uri)
+ punycode_autolink_node!(addressable_uri, node)
+ sanitize_link_text!(node)
+ add_malicious_tooltip!(addressable_uri, node)
+ add_nofollow!(addressable_uri, node)
end
end
@@ -23,12 +35,18 @@ module Banzai
private
- def uri(href)
+ def uri_strict(href)
URI.parse(href)
rescue URI::Error
nil
end
+ def addressable_uri(href)
+ Addressable::URI.parse(href)
+ rescue Addressable::URI::InvalidURIError
+ nil
+ end
+
def links
query = 'descendant-or-self::a[@href and not(@href = "")]'
doc.xpath(query)
@@ -45,6 +63,57 @@ module Banzai
def internal_url
@internal_url ||= URI.parse(Gitlab.config.gitlab.url)
end
+
+ # Only replace an autolink with an IDN with it's punycode
+ # version if we need emailable links. Otherwise let it
+ # be shown normally and the tooltips will show the
+ # punycode version.
+ def punycode_autolink_node!(uri, node)
+ return unless uri
+ return unless context[:emailable_links]
+
+ unencoded_uri_str = Addressable::URI.unencode(node['href'])
+
+ if unencoded_uri_str == node.content && idn?(uri)
+ node.content = uri.normalize
+ end
+ end
+
+ # escape any right-to-left (RTLO) characters in link text
+ def sanitize_link_text!(node)
+ node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO)
+ end
+
+ # If the domain is an international domain name (IDN),
+ # let's expose with a tooltip in case it's intended
+ # to be malicious. This is particularly useful for links
+ # where the link text is not the same as the actual link.
+ # We will continue to show the unicode version of the domain
+ # in autolinked link text, which could contain emojis, etc.
+ #
+ # Also show the tooltip if the url contains the RTLO character,
+ # as this is an indicator of a malicious link
+ def add_malicious_tooltip!(uri, node)
+ if idn?(uri) || has_encoded_rtlo?(uri)
+ node.add_class('has-tooltip')
+ node.set_attribute('title', uri.normalize)
+ end
+ end
+
+ def add_nofollow!(uri, node)
+ if SCHEMES.include?(uri&.scheme)
+ node.set_attribute('rel', 'nofollow noreferrer noopener')
+ node.set_attribute('target', '_blank')
+ end
+ end
+
+ def idn?(uri)
+ uri&.normalized_host&.start_with?('xn--')
+ end
+
+ def has_encoded_rtlo?(uri)
+ uri&.to_s&.include?(ENCODED_RTLO)
+ end
end
end
end
diff --git a/lib/banzai/pipeline/email_pipeline.rb b/lib/banzai/pipeline/email_pipeline.rb
index 0f4dd9d143d..13e6a990407 100644
--- a/lib/banzai/pipeline/email_pipeline.rb
+++ b/lib/banzai/pipeline/email_pipeline.rb
@@ -12,6 +12,7 @@ module Banzai
def self.transform_context(context)
super(context).merge(
only_path: false,
+ emailable_links: true,
no_sourcepos: true
)
end
diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb
index 7a457403b51..6217381c491 100644
--- a/spec/lib/banzai/filter/autolink_filter_spec.rb
+++ b/spec/lib/banzai/filter/autolink_filter_spec.rb
@@ -188,6 +188,22 @@ describe Banzai::Filter::AutolinkFilter do
expect(doc.at_css('a')['class']).to eq 'custom'
end
+ it 'escapes RTLO and other characters' do
+ # rendered text looks like "http://example.com/evilexe.mp3"
+ evil_link = "#{link}evil\u202E3pm.exe"
+ doc = filter("#{evil_link}")
+
+ expect(doc.at_css('a')['href']).to eq "http://about.gitlab.com/evil%E2%80%AE3pm.exe"
+ end
+
+ it 'encodes international domains' do
+ link = "http://one😄two.com"
+ expected = "http://one%F0%9F%98%84two.com"
+ doc = filter(link)
+
+ expect(doc.at_css('a')['href']).to eq expected
+ end
+
described_class::IGNORE_PARENTS.each do |elem|
it "ignores valid links contained inside '#{elem}' element" do
exp = act = "<#{elem}>See #{link}</#{elem}>"
diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb
index e6dae8d5382..2acbe05f082 100644
--- a/spec/lib/banzai/filter/external_link_filter_spec.rb
+++ b/spec/lib/banzai/filter/external_link_filter_spec.rb
@@ -62,6 +62,13 @@ describe Banzai::Filter::ExternalLinkFilter do
expect(doc.to_html).to eq(expected)
end
+
+ it 'adds rel and target to improperly formatted autolinks' do
+ doc = filter %q(<p><a href="mailto://jblogs@example.com">mailto://jblogs@example.com</a></p>)
+ expected = %q(<p><a href="mailto://jblogs@example.com" rel="nofollow noreferrer noopener" target="_blank">mailto://jblogs@example.com</a></p>)
+
+ expect(doc.to_html).to eq(expected)
+ end
end
context 'for links with a username' do
@@ -112,4 +119,62 @@ describe Banzai::Filter::ExternalLinkFilter do
it_behaves_like 'an external link with rel attribute'
end
+
+ context 'links with RTLO character' do
+ # In rendered text this looks like "http://example.com/evilexe.mp3"
+ let(:doc) { filter %Q(<a href="http://example.com/evil%E2%80%AE3pm.exe">http://example.com/evil\u202E3pm.exe</a>) }
+
+ it_behaves_like 'an external link with rel attribute'
+
+ it 'escapes RTLO in link text' do
+ expected = %q(http://example.com/evil%E2%80%AE3pm.exe</a>)
+
+ expect(doc.to_html).to include(expected)
+ end
+
+ it 'does not mangle the link text' do
+ doc = filter %Q(<a href="http://example.com">One<span>and</span>\u202Eexe.mp3</a>)
+
+ expect(doc.to_html).to include('One<span>and</span>%E2%80%AEexe.mp3</a>')
+ end
+ end
+
+ context 'for generated autolinks' do
+ context 'with an IDN character' do
+ let(:doc) { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>)) }
+ let(:doc_email) { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>), emailable_links: true) }
+
+ it_behaves_like 'an external link with rel attribute'
+
+ it 'does not change the link text' do
+ expect(doc.to_html).to include('http://exa😄mple.com</a>')
+ end
+
+ it 'uses punycode for emails' do
+ expect(doc_email.to_html).to include('http://xn--example-6p25f.com/</a>')
+ end
+ end
+ end
+
+ context 'for links that look malicious' do
+ context 'with an IDN character' do
+ let(:doc) { filter %q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>) }
+
+ it 'adds a toolip with punycode' do
+ expect(doc.to_html).to include('http://exa😄mple.com</a>')
+ expect(doc.to_html).to include('class="has-tooltip"')
+ expect(doc.to_html).to include('title="http://xn--example-6p25f.com/"')
+ end
+ end
+
+ context 'with RTLO character' do
+ let(:doc) { filter %q(<a href="http://example.com/evil%E2%80%AE3pm.exe">Evil Test</a>) }
+
+ it 'adds a toolip with punycode' do
+ expect(doc.to_html).to include('Evil Test</a>')
+ expect(doc.to_html).to include('class="has-tooltip"')
+ expect(doc.to_html).to include('title="http://example.com/evil%E2%80%AE3pm.exe"')
+ end
+ end
+ end
end
diff --git a/spec/lib/banzai/pipeline/email_pipeline_spec.rb b/spec/lib/banzai/pipeline/email_pipeline_spec.rb
index 6a11ca2f9d5..b99161109eb 100644
--- a/spec/lib/banzai/pipeline/email_pipeline_spec.rb
+++ b/spec/lib/banzai/pipeline/email_pipeline_spec.rb
@@ -10,5 +10,19 @@ describe Banzai::Pipeline::EmailPipeline do
expect(described_class.filters).not_to be_empty
expect(described_class.filters).not_to include(Banzai::Filter::ImageLazyLoadFilter)
end
+
+ it 'shows punycode for autolinks' do
+ examples = %W[
+ http://one😄two.com
+ http://\u0261itlab.com
+ ]
+
+ examples.each do |markdown|
+ result = described_class.call(markdown, project: nil)[:output]
+ link = result.css('a').first
+
+ expect(link.content).to include('http://xn--')
+ end
+ end
end
end
diff --git a/spec/lib/banzai/pipeline/full_pipeline_spec.rb b/spec/lib/banzai/pipeline/full_pipeline_spec.rb
index aa503b6e1d5..3d3aa64d630 100644
--- a/spec/lib/banzai/pipeline/full_pipeline_spec.rb
+++ b/spec/lib/banzai/pipeline/full_pipeline_spec.rb
@@ -59,4 +59,42 @@ describe Banzai::Pipeline::FullPipeline do
expect(html.lines.map(&:strip).join("\n")).to eq filtered_footnote
end
end
+
+ describe 'links are detected as malicious' do
+ it 'has tooltips for malicious links' do
+ examples = %W[
+ http://example.com/evil\u202E3pm.exe
+ [evilexe.mp3](http://example.com/evil\u202E3pm.exe)
+ rdar://localhost.com/\u202E3pm.exe
+ http://one😄two.com
+ [Evil-Test](http://one😄two.com)
+ http://\u0261itlab.com
+ [Evil-GitLab-link](http://\u0261itlab.com)
+ ![Evil-GitLab-link](http://\u0261itlab.com.png)
+ ]
+
+ examples.each do |markdown|
+ result = described_class.call(markdown, project: nil)[:output]
+ link = result.css('a').first
+
+ expect(link[:class]).to include('has-tooltip')
+ end
+ end
+
+ it 'has no tooltips for safe links' do
+ examples = %w[
+ http://example.com
+ [Safe-Test](http://example.com)
+ https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg
+ [Wikipedia-link](https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg)
+ ]
+
+ examples.each do |markdown|
+ result = described_class.call(markdown, project: nil)[:output]
+ link = result.css('a').first
+
+ expect(link[:class]).to be_nil
+ end
+ end
+ end
end