From acc694ead6f8a7428efab126aa6b8a29b132db43 Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Fri, 26 Jul 2019 13:41:11 +0000 Subject: Extract SanitizeNodeLink and apply to WikiLinkFilter The SanitizationFilter was running before the WikiFilter. Since WikiFilter can modify links, we could see links that _should_ be stopped by SanatizationFilter being rendered on the page. I (kerrizor) had previously addressed the bug in: https://gitlab.com/gitlab-org/gitlab-ee/commit/7bc971915bbeadb950bb0e1f13510bf3038229a4 However, an additional exploit was discovered after that was merged. Working through the issue, we couldn't simply shuffle the order of filters, due to some implicit assumptions about the order of filters, so instead we've extracted the logic that sanitizes a Nokogiri-generated Node object, and applied it to the WikiLinkFilter as well. On moving filters around: Once we start moving around filters, we get cascading failures; fix one, another one crops up. Many of the existing filters in the WikiPipeline chain seem to assume that other filters have already done their work, and thus operate on a "transform anything that's left" basis; WikiFilter, for instance, assumes any link it finds in the markdown should be prepended with the wiki_base_path.. but if it does that, it also turns `href="@user"` into `href="/path/to/wiki/@user"`, which the UserReferenceFilter doesn't see as a user reference it needs to transform into a user profile link. This is true for all the reference filters in the WikiPipeline. --- lib/banzai/filter/autolink_filter.rb | 11 ++------- lib/banzai/filter/base_sanitization_filter.rb | 32 ++------------------------ lib/banzai/filter/wiki_link_filter.rb | 15 +++++++++--- lib/banzai/filter/wiki_link_filter/rewriter.rb | 8 ------- 4 files changed, 16 insertions(+), 50 deletions(-) (limited to 'lib/banzai') diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index 56214043d87..5f2cbc24c60 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -18,6 +18,7 @@ module Banzai # class AutolinkFilter < HTML::Pipeline::Filter include ActionView::Helpers::TagHelper + include Gitlab::Utils::SanitizeNodeLink # Pattern to match text that should be autolinked. # @@ -72,19 +73,11 @@ module Banzai private - # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme - def contains_unsafe?(scheme) - return false unless scheme - - scheme = scheme.strip.downcase - Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } - end - def autolink_match(match) # start by stripping out dangerous links begin uri = Addressable::URI.parse(match) - return match if contains_unsafe?(uri.scheme) + return match unless safe_protocol?(uri.scheme) rescue Addressable::URI::InvalidURIError return match end diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb index 420e92cb1e8..2dabca3552d 100644 --- a/lib/banzai/filter/base_sanitization_filter.rb +++ b/lib/banzai/filter/base_sanitization_filter.rb @@ -11,6 +11,7 @@ module Banzai # Extends HTML::Pipeline::SanitizationFilter with common rules. class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter include Gitlab::Utils::StrongMemoize + extend Gitlab::Utils::SanitizeNodeLink UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze @@ -40,7 +41,7 @@ module Banzai # Allow any protocol in `a` elements # and then remove links with unsafe protocols whitelist[:protocols].delete('a') - whitelist[:transformers].push(self.class.remove_unsafe_links) + whitelist[:transformers].push(self.class.method(:remove_unsafe_links)) # Remove `rel` attribute from `a` elements whitelist[:transformers].push(self.class.remove_rel) @@ -54,35 +55,6 @@ module Banzai end class << self - def remove_unsafe_links - lambda do |env| - node = env[:node] - - return unless node.name == 'a' - return unless node.has_attribute?('href') - - begin - node['href'] = node['href'].strip - uri = Addressable::URI.parse(node['href']) - - return unless uri.scheme - - # Remove all invalid scheme characters before checking against the - # list of unsafe protocols. - # - # See https://tools.ietf.org/html/rfc3986#section-3.1 - scheme = uri.scheme - .strip - .downcase - .gsub(/[^A-Za-z0-9\+\.\-]+/, '') - - node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) - rescue Addressable::URI::InvalidURIError - node.remove_attribute('href') - end - end - end - def remove_rel lambda do |env| if env[:node_name] == 'a' diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb index 1728a442533..18947679b69 100644 --- a/lib/banzai/filter/wiki_link_filter.rb +++ b/lib/banzai/filter/wiki_link_filter.rb @@ -8,15 +8,19 @@ module Banzai # Context options: # :project_wiki class WikiLinkFilter < HTML::Pipeline::Filter + include Gitlab::Utils::SanitizeNodeLink + def call return doc unless project_wiki? - doc.search('a:not(.gfm)').each { |el| process_link_attr(el.attribute('href')) } - doc.search('video').each { |el| process_link_attr(el.attribute('src')) } + doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) } + + doc.search('video').each { |el| process_link(el.attribute('src'), el) } + doc.search('img').each do |el| attr = el.attribute('data-src') || el.attribute('src') - process_link_attr(attr) + process_link(attr, el) end doc @@ -24,6 +28,11 @@ module Banzai protected + def process_link(link_attr, node) + process_link_attr(link_attr) + remove_unsafe_links({ node: node }, remove_invalid_links: false) + end + def project_wiki? !context[:project_wiki].nil? end diff --git a/lib/banzai/filter/wiki_link_filter/rewriter.rb b/lib/banzai/filter/wiki_link_filter/rewriter.rb index 77b5053f38c..f4cc8beeb52 100644 --- a/lib/banzai/filter/wiki_link_filter/rewriter.rb +++ b/lib/banzai/filter/wiki_link_filter/rewriter.rb @@ -4,8 +4,6 @@ module Banzai module Filter class WikiLinkFilter < HTML::Pipeline::Filter class Rewriter - UNSAFE_SLUG_REGEXES = [/\Ajavascript:/i].freeze - def initialize(link_string, wiki:, slug:) @uri = Addressable::URI.parse(link_string) @wiki_base_path = wiki && wiki.wiki_base_path @@ -37,8 +35,6 @@ module Banzai # Of the form `./link`, `../link`, or similar def apply_hierarchical_link_rules! - return if slug_considered_unsafe? - @uri = Addressable::URI.join(@slug, @uri) if @uri.to_s[0] == '.' end @@ -58,10 +54,6 @@ module Banzai def repository_upload? @uri.relative? && @uri.path.starts_with?(Wikis::CreateAttachmentService::ATTACHMENT_PATH) end - - def slug_considered_unsafe? - UNSAFE_SLUG_REGEXES.any? { |r| r.match?(@slug) } - end end end end -- cgit v1.2.1