summaryrefslogtreecommitdiff
path: root/lib/gitlab/utils/sanitize_node_link.rb
diff options
context:
space:
mode:
authorAlex Groleau <agroleau@gitlab.com>2019-08-27 12:41:39 -0400
committerAlex Groleau <agroleau@gitlab.com>2019-08-27 12:41:39 -0400
commitaa01f092829facd1044ad02f334422b7dbdc8b0e (patch)
treea754bf2497820432df7da0f2108bb7527a8dd7b8 /lib/gitlab/utils/sanitize_node_link.rb
parenta1d9c9994a9a4d79b824c3fd9322688303ac8b03 (diff)
parent6b10779053ff4233c7a64c5ab57754fce63f6710 (diff)
downloadgitlab-ce-runner-metrics-extractor.tar.gz
Merge branch 'master' of gitlab_gitlab:gitlab-org/gitlab-cerunner-metrics-extractor
Diffstat (limited to 'lib/gitlab/utils/sanitize_node_link.rb')
-rw-r--r--lib/gitlab/utils/sanitize_node_link.rb61
1 files changed, 61 insertions, 0 deletions
diff --git a/lib/gitlab/utils/sanitize_node_link.rb b/lib/gitlab/utils/sanitize_node_link.rb
new file mode 100644
index 00000000000..620d71a7814
--- /dev/null
+++ b/lib/gitlab/utils/sanitize_node_link.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+require_dependency 'gitlab/utils'
+
+module Gitlab
+ module Utils
+ module SanitizeNodeLink
+ UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
+ ATTRS_TO_SANITIZE = %w(href src data-src).freeze
+
+ def remove_unsafe_links(env, remove_invalid_links: true)
+ node = env[:node]
+
+ sanitize_node(node: node, remove_invalid_links: remove_invalid_links)
+
+ # HTML entities such as <video></video> have scannable attrs in
+ # children elements, which also need to be sanitized.
+ #
+ node.children.each do |child_node|
+ sanitize_node(node: child_node, remove_invalid_links: remove_invalid_links)
+ end
+ end
+
+ # Remove all invalid scheme characters before checking against the
+ # list of unsafe protocols.
+ #
+ # See https://tools.ietf.org/html/rfc3986#section-3.1
+ #
+ def safe_protocol?(scheme)
+ return false unless scheme
+
+ scheme = scheme
+ .strip
+ .downcase
+ .gsub(/[^A-Za-z\+\.\-]+/, '')
+
+ UNSAFE_PROTOCOLS.none?(scheme)
+ end
+
+ private
+
+ def sanitize_node(node:, remove_invalid_links: true)
+ ATTRS_TO_SANITIZE.each do |attr|
+ next unless node.has_attribute?(attr)
+
+ begin
+ node[attr] = node[attr].strip
+ uri = Addressable::URI.parse(node[attr])
+
+ next unless uri.scheme
+ next if safe_protocol?(uri.scheme)
+
+ node.remove_attribute(attr)
+ rescue Addressable::URI::InvalidURIError
+ node.remove_attribute(attr) if remove_invalid_links
+ end
+ end
+ end
+ end
+ end
+end