summaryrefslogtreecommitdiff
path: root/lib/gitlab/sanitizers/svg.rb
blob: 3e7056878734cd86e2aea6b2ec0380bf0659d656 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
module Gitlab
  module Sanitizers
    module SVG
      def self.clean(data)
        Loofah.xml_document(data).scrub!(Scrubber.new).to_s
      end

      class Scrubber < Loofah::Scrubber
        # http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#embedding-custom-non-visible-data-with-the-data-*-attributes
        DATA_ATTR_PATTERN = /\Adata-(?!xml)[a-z_][\w.\u00E0-\u00F6\u00F8-\u017F\u01DD-\u02AF-]*\z/u

        def scrub(node)
          if Whitelist::ALLOWED_ELEMENTS.include?(node.name)
            valid_attributes = Whitelist::ALLOWED_ATTRIBUTES[node.name]
            return unless valid_attributes

            node.attribute_nodes.each do |attr|
              attr_name = attribute_name_with_namespace(attr)

              if valid_attributes.include?(attr_name)
                # xlink:href is on the whitelist but we should deny any reference other than internal ids
                if attr_name == 'xlink:href' && unsafe_href?(attr)
                  attr.unlink
                end
              else
                if Whitelist::ALLOWED_DATA_ATTRIBUTES_IN_ELEMENTS.include?(node.name) && data_attribute?(attr)
                  # Arbitrary data attributes are allowed. Verify that the attribute
                  # is a valid data attribute.
                  attr.unlink unless attr_name =~ DATA_ATTR_PATTERN
                else
                  attr.unlink
                end
              end
            end
          else
            node.unlink
          end
        end

        def attribute_name_with_namespace(attr)
          if attr.namespace
            "#{attr.namespace.prefix}:#{attr.name}"
          else
            attr.name
          end
        end

        def unsafe_href?(attr)
          !attr.value.start_with?('#')
        end

        def data_attribute?(attr)
          attr.name.start_with?('data-')
        end
      end
    end
  end
end