lib/banzai/filter/reference_filter.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

# frozen_string_literal: true

# Generated HTML is transformed back to GFM by app/assets/javascripts/behaviors/markdown/nodes/reference.js
module Banzai
  module Filter
    # Base class for GitLab Flavored Markdown reference filters.
    #
    # References within <pre>, <code>, <a>, and <style> elements are ignored.
    #
    # Context options:
    #   :project (required) - Current project, ignored if reference is cross-project.
    #   :only_path          - Generate path-only links.
    class ReferenceFilter < HTML::Pipeline::Filter
      include RequestStoreReferenceCache
      include OutputSafety

      class << self
        attr_accessor :reference_type

        def call(doc, context = nil, result = nil)
          new(doc, context, result).call_and_update_nodes
        end
      end

      def initialize(doc, context = nil, result = nil)
        super

        @new_nodes = {}
        @nodes = self.result[:reference_filter_nodes]
      end

      def call_and_update_nodes
        with_update_nodes { call }
      end

      # Returns a data attribute String to attach to a reference link
      #
      # attributes - Hash, where the key becomes the data attribute name and the
      #              value is the data attribute value
      #
      # Examples:
      #
      #   data_attribute(project: 1, issue: 2)
      #   # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"1\" data-issue=\"2\""
      #
      #   data_attribute(project: 3, merge_request: 4)
      #   # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"3\" data-merge-request=\"4\""
      #
      # Returns a String
      def data_attribute(attributes = {})
        attributes = attributes.reject { |_, v| v.nil? }

        attributes[:reference_type] ||= self.class.reference_type
        attributes[:container] ||= 'body'
        attributes[:placement] ||= 'top'
        attributes.delete(:original) if context[:no_original_data]
        attributes.map do |key, value|
          %Q(data-#{key.to_s.dasherize}="#{escape_once(value)}")
        end.join(' ')
      end

      def ignore_ancestor_query
        @ignore_ancestor_query ||= begin
          parents = %w(pre code a style)
          parents << 'blockquote' if context[:ignore_blockquotes]

          parents.map { |n| "ancestor::#{n}" }.join(' or ')
        end
      end

      def project
        context[:project]
      end

      def group
        context[:group]
      end

      def user
        context[:user]
      end

      def skip_project_check?
        context[:skip_project_check]
      end

      def reference_class(type, tooltip: true)
        gfm_klass = "gfm gfm-#{type}"

        return gfm_klass unless tooltip

        "#{gfm_klass} has-tooltip"
      end

      # Ensure that a :project key exists in context
      #
      # Note that while the key might exist, its value could be nil!
      def validate
        needs :project unless skip_project_check?
      end

      # Iterates over all <a> and text() nodes in a document.
      #
      # Nodes are skipped whenever their ancestor is one of the nodes returned
      # by `ignore_ancestor_query`. Link tags are not processed if they have a
      # "gfm" class or the "href" attribute is empty.
      def each_node
        return to_enum(__method__) unless block_given?

        doc.xpath(query).each do |node|
          yield node
        end
      end

      # Returns an Array containing all HTML nodes.
      def nodes
        @nodes ||= each_node.to_a
      end

      # Yields the link's URL and inner HTML whenever the node is a valid <a> tag.
      def yield_valid_link(node)
        link = unescape_link(node.attr('href').to_s)
        inner_html = node.inner_html

        return unless link.force_encoding('UTF-8').valid_encoding?

        yield link, inner_html
      end

      def unescape_link(href)
        CGI.unescape(href)
      end

      def replace_text_when_pattern_matches(node, index, pattern)
        return unless node.text =~ pattern

        content = node.to_html
        html = yield content

        replace_text_with_html(node, index, html) unless html == content
      end

      def replace_link_node_with_text(node, index)
        html = yield

        replace_text_with_html(node, index, html) unless html == node.text
      end

      def replace_link_node_with_href(node, index, link)
        html = yield

        replace_text_with_html(node, index, html) unless html == link
      end

      def text_node?(node)
        node.is_a?(Nokogiri::XML::Text)
      end

      def element_node?(node)
        node.is_a?(Nokogiri::XML::Element)
      end

      private

      def query
        @query ||= %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})]
        | descendant-or-self::a[
          not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "")
        ]}
      end

      def replace_text_with_html(node, index, html)
        replace_and_update_new_nodes(node, index, html)
      end

      def replace_and_update_new_nodes(node, index, html)
        previous_node = node.previous
        next_node = node.next
        parent_node = node.parent
        # Unfortunately node.replace(html) returns re-parented nodes, not the actual replaced nodes in the doc
        # We need to find the actual nodes in the doc that were replaced
        node.replace(html)
        @new_nodes[index] = []

        # We replaced node with new nodes, so we find first new node. If previous_node is nil, we take first parent child
        new_node = previous_node ? previous_node.next : parent_node&.children&.first

        # We iterate from first to last replaced node and store replaced nodes in @new_nodes
        while new_node && new_node != next_node
          @new_nodes[index] << new_node.xpath(query)
          new_node = new_node.next
        end

        @new_nodes[index].flatten!
      end

      def only_path?
        context[:only_path]
      end

      def with_update_nodes
        @new_nodes = {}
        yield.tap { update_nodes! }
      end

      # Once Filter completes replacing nodes, we update nodes with @new_nodes
      def update_nodes!
        @new_nodes.sort_by { |index, _new_nodes| -index }.each do |index, new_nodes|
          nodes[index, 1] = new_nodes
        end
        result[:reference_filter_nodes] = nodes
      end
    end
  end
end