summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/banzai/filter/autolink_filter.rb11
-rw-r--r--lib/banzai/filter/base_sanitization_filter.rb32
-rw-r--r--lib/banzai/filter/wiki_link_filter.rb15
-rw-r--r--lib/banzai/filter/wiki_link_filter/rewriter.rb8
-rw-r--r--lib/gitlab/utils/sanitize_node_link.rb61
5 files changed, 77 insertions, 50 deletions
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index 56214043d87..5f2cbc24c60 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -18,6 +18,7 @@ module Banzai
#
class AutolinkFilter < HTML::Pipeline::Filter
include ActionView::Helpers::TagHelper
+ include Gitlab::Utils::SanitizeNodeLink
# Pattern to match text that should be autolinked.
#
@@ -72,19 +73,11 @@ module Banzai
private
- # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
- def contains_unsafe?(scheme)
- return false unless scheme
-
- scheme = scheme.strip.downcase
- Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
- end
-
def autolink_match(match)
# start by stripping out dangerous links
begin
uri = Addressable::URI.parse(match)
- return match if contains_unsafe?(uri.scheme)
+ return match unless safe_protocol?(uri.scheme)
rescue Addressable::URI::InvalidURIError
return match
end
diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb
index 420e92cb1e8..2dabca3552d 100644
--- a/lib/banzai/filter/base_sanitization_filter.rb
+++ b/lib/banzai/filter/base_sanitization_filter.rb
@@ -11,6 +11,7 @@ module Banzai
# Extends HTML::Pipeline::SanitizationFilter with common rules.
class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter
include Gitlab::Utils::StrongMemoize
+ extend Gitlab::Utils::SanitizeNodeLink
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
@@ -40,7 +41,7 @@ module Banzai
# Allow any protocol in `a` elements
# and then remove links with unsafe protocols
whitelist[:protocols].delete('a')
- whitelist[:transformers].push(self.class.remove_unsafe_links)
+ whitelist[:transformers].push(self.class.method(:remove_unsafe_links))
# Remove `rel` attribute from `a` elements
whitelist[:transformers].push(self.class.remove_rel)
@@ -54,35 +55,6 @@ module Banzai
end
class << self
- def remove_unsafe_links
- lambda do |env|
- node = env[:node]
-
- return unless node.name == 'a'
- return unless node.has_attribute?('href')
-
- begin
- node['href'] = node['href'].strip
- uri = Addressable::URI.parse(node['href'])
-
- return unless uri.scheme
-
- # Remove all invalid scheme characters before checking against the
- # list of unsafe protocols.
- #
- # See https://tools.ietf.org/html/rfc3986#section-3.1
- scheme = uri.scheme
- .strip
- .downcase
- .gsub(/[^A-Za-z0-9\+\.\-]+/, '')
-
- node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme)
- rescue Addressable::URI::InvalidURIError
- node.remove_attribute('href')
- end
- end
- end
-
def remove_rel
lambda do |env|
if env[:node_name] == 'a'
diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb
index 1728a442533..18947679b69 100644
--- a/lib/banzai/filter/wiki_link_filter.rb
+++ b/lib/banzai/filter/wiki_link_filter.rb
@@ -8,15 +8,19 @@ module Banzai
# Context options:
# :project_wiki
class WikiLinkFilter < HTML::Pipeline::Filter
+ include Gitlab::Utils::SanitizeNodeLink
+
def call
return doc unless project_wiki?
- doc.search('a:not(.gfm)').each { |el| process_link_attr(el.attribute('href')) }
- doc.search('video').each { |el| process_link_attr(el.attribute('src')) }
+ doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) }
+
+ doc.search('video').each { |el| process_link(el.attribute('src'), el) }
+
doc.search('img').each do |el|
attr = el.attribute('data-src') || el.attribute('src')
- process_link_attr(attr)
+ process_link(attr, el)
end
doc
@@ -24,6 +28,11 @@ module Banzai
protected
+ def process_link(link_attr, node)
+ process_link_attr(link_attr)
+ remove_unsafe_links({ node: node }, remove_invalid_links: false)
+ end
+
def project_wiki?
!context[:project_wiki].nil?
end
diff --git a/lib/banzai/filter/wiki_link_filter/rewriter.rb b/lib/banzai/filter/wiki_link_filter/rewriter.rb
index 77b5053f38c..f4cc8beeb52 100644
--- a/lib/banzai/filter/wiki_link_filter/rewriter.rb
+++ b/lib/banzai/filter/wiki_link_filter/rewriter.rb
@@ -4,8 +4,6 @@ module Banzai
module Filter
class WikiLinkFilter < HTML::Pipeline::Filter
class Rewriter
- UNSAFE_SLUG_REGEXES = [/\Ajavascript:/i].freeze
-
def initialize(link_string, wiki:, slug:)
@uri = Addressable::URI.parse(link_string)
@wiki_base_path = wiki && wiki.wiki_base_path
@@ -37,8 +35,6 @@ module Banzai
# Of the form `./link`, `../link`, or similar
def apply_hierarchical_link_rules!
- return if slug_considered_unsafe?
-
@uri = Addressable::URI.join(@slug, @uri) if @uri.to_s[0] == '.'
end
@@ -58,10 +54,6 @@ module Banzai
def repository_upload?
@uri.relative? && @uri.path.starts_with?(Wikis::CreateAttachmentService::ATTACHMENT_PATH)
end
-
- def slug_considered_unsafe?
- UNSAFE_SLUG_REGEXES.any? { |r| r.match?(@slug) }
- end
end
end
end
diff --git a/lib/gitlab/utils/sanitize_node_link.rb b/lib/gitlab/utils/sanitize_node_link.rb
new file mode 100644
index 00000000000..620d71a7814
--- /dev/null
+++ b/lib/gitlab/utils/sanitize_node_link.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+require_dependency 'gitlab/utils'
+
+module Gitlab
+ module Utils
+ module SanitizeNodeLink
+ UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
+ ATTRS_TO_SANITIZE = %w(href src data-src).freeze
+
+ def remove_unsafe_links(env, remove_invalid_links: true)
+ node = env[:node]
+
+ sanitize_node(node: node, remove_invalid_links: remove_invalid_links)
+
+ # HTML entities such as <video></video> have scannable attrs in
+ # children elements, which also need to be sanitized.
+ #
+ node.children.each do |child_node|
+ sanitize_node(node: child_node, remove_invalid_links: remove_invalid_links)
+ end
+ end
+
+ # Remove all invalid scheme characters before checking against the
+ # list of unsafe protocols.
+ #
+ # See https://tools.ietf.org/html/rfc3986#section-3.1
+ #
+ def safe_protocol?(scheme)
+ return false unless scheme
+
+ scheme = scheme
+ .strip
+ .downcase
+ .gsub(/[^A-Za-z\+\.\-]+/, '')
+
+ UNSAFE_PROTOCOLS.none?(scheme)
+ end
+
+ private
+
+ def sanitize_node(node:, remove_invalid_links: true)
+ ATTRS_TO_SANITIZE.each do |attr|
+ next unless node.has_attribute?(attr)
+
+ begin
+ node[attr] = node[attr].strip
+ uri = Addressable::URI.parse(node[attr])
+
+ next unless uri.scheme
+ next if safe_protocol?(uri.scheme)
+
+ node.remove_attribute(attr)
+ rescue Addressable::URI::InvalidURIError
+ node.remove_attribute(attr) if remove_invalid_links
+ end
+ end
+ end
+ end
+ end
+end