diff options
author | Alex Groleau <agroleau@gitlab.com> | 2019-08-27 12:41:39 -0400 |
---|---|---|
committer | Alex Groleau <agroleau@gitlab.com> | 2019-08-27 12:41:39 -0400 |
commit | aa01f092829facd1044ad02f334422b7dbdc8b0e (patch) | |
tree | a754bf2497820432df7da0f2108bb7527a8dd7b8 /lib/gitlab/utils/sanitize_node_link.rb | |
parent | a1d9c9994a9a4d79b824c3fd9322688303ac8b03 (diff) | |
parent | 6b10779053ff4233c7a64c5ab57754fce63f6710 (diff) | |
download | gitlab-ce-runner-metrics-extractor.tar.gz |
Merge branch 'master' of gitlab_gitlab:gitlab-org/gitlab-cerunner-metrics-extractor
Diffstat (limited to 'lib/gitlab/utils/sanitize_node_link.rb')
-rw-r--r-- | lib/gitlab/utils/sanitize_node_link.rb | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/lib/gitlab/utils/sanitize_node_link.rb b/lib/gitlab/utils/sanitize_node_link.rb new file mode 100644 index 00000000000..620d71a7814 --- /dev/null +++ b/lib/gitlab/utils/sanitize_node_link.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require_dependency 'gitlab/utils' + +module Gitlab + module Utils + module SanitizeNodeLink + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + ATTRS_TO_SANITIZE = %w(href src data-src).freeze + + def remove_unsafe_links(env, remove_invalid_links: true) + node = env[:node] + + sanitize_node(node: node, remove_invalid_links: remove_invalid_links) + + # HTML entities such as <video></video> have scannable attrs in + # children elements, which also need to be sanitized. + # + node.children.each do |child_node| + sanitize_node(node: child_node, remove_invalid_links: remove_invalid_links) + end + end + + # Remove all invalid scheme characters before checking against the + # list of unsafe protocols. + # + # See https://tools.ietf.org/html/rfc3986#section-3.1 + # + def safe_protocol?(scheme) + return false unless scheme + + scheme = scheme + .strip + .downcase + .gsub(/[^A-Za-z\+\.\-]+/, '') + + UNSAFE_PROTOCOLS.none?(scheme) + end + + private + + def sanitize_node(node:, remove_invalid_links: true) + ATTRS_TO_SANITIZE.each do |attr| + next unless node.has_attribute?(attr) + + begin + node[attr] = node[attr].strip + uri = Addressable::URI.parse(node[attr]) + + next unless uri.scheme + next if safe_protocol?(uri.scheme) + + node.remove_attribute(attr) + rescue Addressable::URI::InvalidURIError + node.remove_attribute(attr) if remove_invalid_links + end + end + end + end + end +end |