diff options
Diffstat (limited to 'lib/banzai/filter/references/reference_filter.rb')
-rw-r--r-- | lib/banzai/filter/references/reference_filter.rb | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/lib/banzai/filter/references/reference_filter.rb b/lib/banzai/filter/references/reference_filter.rb new file mode 100644 index 00000000000..dd15c43f5d8 --- /dev/null +++ b/lib/banzai/filter/references/reference_filter.rb @@ -0,0 +1,217 @@ +# frozen_string_literal: true + +# Generated HTML is transformed back to GFM by app/assets/javascripts/behaviors/markdown/nodes/reference.js +module Banzai + module Filter + module References + # Base class for GitLab Flavored Markdown reference filters. + # + # References within <pre>, <code>, <a>, and <style> elements are ignored. + # + # Context options: + # :project (required) - Current project, ignored if reference is cross-project. + # :only_path - Generate path-only links. + class ReferenceFilter < HTML::Pipeline::Filter + include RequestStoreReferenceCache + include OutputSafety + + class << self + attr_accessor :reference_type + + def call(doc, context = nil, result = nil) + new(doc, context, result).call_and_update_nodes + end + end + + def initialize(doc, context = nil, result = nil) + super + + @new_nodes = {} + @nodes = self.result[:reference_filter_nodes] + end + + def call_and_update_nodes + with_update_nodes { call } + end + + # Returns a data attribute String to attach to a reference link + # + # attributes - Hash, where the key becomes the data attribute name and the + # value is the data attribute value + # + # Examples: + # + # data_attribute(project: 1, issue: 2) + # # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"1\" data-issue=\"2\"" + # + # data_attribute(project: 3, merge_request: 4) + # # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"3\" data-merge-request=\"4\"" + # + # Returns a String + def data_attribute(attributes = {}) + attributes = attributes.reject { |_, v| v.nil? } + + attributes[:reference_type] ||= self.class.reference_type + attributes[:container] ||= 'body' + attributes[:placement] ||= 'top' + attributes.delete(:original) if context[:no_original_data] + attributes.map do |key, value| + %Q(data-#{key.to_s.dasherize}="#{escape_once(value)}") + end.join(' ') + end + + def ignore_ancestor_query + @ignore_ancestor_query ||= begin + parents = %w(pre code a style) + parents << 'blockquote' if context[:ignore_blockquotes] + + parents.map { |n| "ancestor::#{n}" }.join(' or ') + end + end + + def project + context[:project] + end + + def group + context[:group] + end + + def user + context[:user] + end + + def skip_project_check? + context[:skip_project_check] + end + + def reference_class(type, tooltip: true) + gfm_klass = "gfm gfm-#{type}" + + return gfm_klass unless tooltip + + "#{gfm_klass} has-tooltip" + end + + # Ensure that a :project key exists in context + # + # Note that while the key might exist, its value could be nil! + def validate + needs :project unless skip_project_check? + end + + # Iterates over all <a> and text() nodes in a document. + # + # Nodes are skipped whenever their ancestor is one of the nodes returned + # by `ignore_ancestor_query`. Link tags are not processed if they have a + # "gfm" class or the "href" attribute is empty. + def each_node + return to_enum(__method__) unless block_given? + + doc.xpath(query).each do |node| + yield node + end + end + + # Returns an Array containing all HTML nodes. + def nodes + @nodes ||= each_node.to_a + end + + # Yields the link's URL and inner HTML whenever the node is a valid <a> tag. + def yield_valid_link(node) + link = unescape_link(node.attr('href').to_s) + inner_html = node.inner_html + + return unless link.force_encoding('UTF-8').valid_encoding? + + yield link, inner_html + end + + def unescape_link(href) + CGI.unescape(href) + end + + def replace_text_when_pattern_matches(node, index, pattern) + return unless node.text =~ pattern + + content = node.to_html + html = yield content + + replace_text_with_html(node, index, html) unless html == content + end + + def replace_link_node_with_text(node, index) + html = yield + + replace_text_with_html(node, index, html) unless html == node.text + end + + def replace_link_node_with_href(node, index, link) + html = yield + + replace_text_with_html(node, index, html) unless html == link + end + + def text_node?(node) + node.is_a?(Nokogiri::XML::Text) + end + + def element_node?(node) + node.is_a?(Nokogiri::XML::Element) + end + + private + + def query + @query ||= %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})] + | descendant-or-self::a[ + not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "") + ]} + end + + def replace_text_with_html(node, index, html) + replace_and_update_new_nodes(node, index, html) + end + + def replace_and_update_new_nodes(node, index, html) + previous_node = node.previous + next_node = node.next + parent_node = node.parent + # Unfortunately node.replace(html) returns re-parented nodes, not the actual replaced nodes in the doc + # We need to find the actual nodes in the doc that were replaced + node.replace(html) + @new_nodes[index] = [] + + # We replaced node with new nodes, so we find first new node. If previous_node is nil, we take first parent child + new_node = previous_node ? previous_node.next : parent_node&.children&.first + + # We iterate from first to last replaced node and store replaced nodes in @new_nodes + while new_node && new_node != next_node + @new_nodes[index] << new_node.xpath(query) + new_node = new_node.next + end + + @new_nodes[index].flatten! + end + + def only_path? + context[:only_path] + end + + def with_update_nodes + @new_nodes = {} + yield.tap { update_nodes! } + end + + # Once Filter completes replacing nodes, we update nodes with @new_nodes + def update_nodes! + @new_nodes.sort_by { |index, _new_nodes| -index }.each do |index, new_nodes| + nodes[index, 1] = new_nodes + end + result[:reference_filter_nodes] = nodes + end + end + end + end +end |