diff options
Diffstat (limited to 'lib/banzai/filter')
28 files changed, 601 insertions, 408 deletions
diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb index 34c38913474..4815bafe238 100644 --- a/lib/banzai/filter/abstract_reference_filter.rb +++ b/lib/banzai/filter/abstract_reference_filter.rb @@ -11,15 +11,15 @@ module Banzai end def self.object_name - object_class.name.underscore + @object_name ||= object_class.name.underscore end def self.object_sym - object_name.to_sym + @object_sym ||= object_name.to_sym end - def self.data_reference - "data-#{object_name.dasherize}" + def self.object_class_title + @object_title ||= object_class.name.titleize end # Public: Find references in text (like `!123` for merge requests) @@ -41,10 +41,6 @@ module Banzai end end - def self.referenced_by(node) - { object_sym => LazyReference.new(object_class, node.attr(data_reference)) } - end - def object_class self.class.object_class end @@ -53,6 +49,10 @@ module Banzai self.class.object_sym end + def object_class_title + self.class.object_class_title + end + def references_in(*args, &block) self.class.references_in(*args, &block) end @@ -62,36 +62,81 @@ module Banzai # Example: project.merge_requests.find end + def find_object_cached(project, id) + if RequestStore.active? + cache = find_objects_cache[object_class][project.id] + + get_or_set_cache(cache, id) { find_object(project, id) } + else + find_object(project, id) + end + end + + def project_from_ref_cache(ref) + if RequestStore.active? + cache = project_refs_cache + + get_or_set_cache(cache, ref) { project_from_ref(ref) } + else + project_from_ref(ref) + end + end + def url_for_object(object, project) # Implement in child class # Example: project_merge_request_url end - def call - if object_class.reference_pattern - # `#123` - replace_text_nodes_matching(object_class.reference_pattern) do |content| - object_link_filter(content, object_class.reference_pattern) - end + def url_for_object_cached(object, project) + if RequestStore.active? + cache = url_for_object_cache[object_class][project.id] - # `[Issue](#123)`, which is turned into - # `<a href="#123">Issue</a>` - replace_link_nodes_with_href(object_class.reference_pattern) do |link, text| - object_link_filter(link, object_class.reference_pattern, link_text: text) - end + get_or_set_cache(cache, object) { url_for_object(object, project) } + else + url_for_object(object, project) end + end - if object_class.link_reference_pattern - # `http://gitlab.example.com/namespace/project/issues/123`, which is turned into - # `<a href="http://gitlab.example.com/namespace/project/issues/123">http://gitlab.example.com/namespace/project/issues/123</a>` - replace_link_nodes_with_text(object_class.link_reference_pattern) do |text| - object_link_filter(text, object_class.link_reference_pattern) - end + def call + return doc if project.nil? + + ref_pattern = object_class.reference_pattern + link_pattern = object_class.link_reference_pattern + + nodes.each do |node| + if text_node?(node) && ref_pattern + replace_text_when_pattern_matches(node, ref_pattern) do |content| + object_link_filter(content, ref_pattern) + end + + elsif element_node?(node) + yield_valid_link(node) do |link, text| + if ref_pattern && link =~ /\A#{ref_pattern}\z/ + replace_link_node_with_href(node, link) do + object_link_filter(link, ref_pattern, link_text: text) + end + + next + end - # `[Issue](http://gitlab.example.com/namespace/project/issues/123)`, which is turned into - # `<a href="http://gitlab.example.com/namespace/project/issues/123">Issue</a>` - replace_link_nodes_with_href(object_class.link_reference_pattern) do |link, text| - object_link_filter(link, object_class.link_reference_pattern, link_text: text) + next unless link_pattern + + if link == text && text =~ /\A#{link_pattern}/ + replace_link_node_with_text(node, link) do + object_link_filter(text, link_pattern) + end + + next + end + + if link =~ /\A#{link_pattern}\z/ + replace_link_node_with_href(node, link) do + object_link_filter(link, link_pattern, link_text: text) + end + + next + end + end end end @@ -109,9 +154,9 @@ module Banzai # have `gfm` and `gfm-OBJECT_NAME` class names attached for styling. def object_link_filter(text, pattern, link_text: nil) references_in(text, pattern) do |match, id, project_ref, matches| - project = project_from_ref(project_ref) + project = project_from_ref_cache(project_ref) - if project && object = find_object(project, id) + if project && object = find_object_cached(project, id) title = object_link_title(object) klass = reference_class(object_sym) @@ -121,8 +166,11 @@ module Banzai object_sym => object.id ) - url = matches[:url] if matches.names.include?("url") - url ||= url_for_object(object, project) + if matches.names.include?("url") && matches[:url] + url = matches[:url] + else + url = url_for_object_cached(object, project) + end text = link_text || object_link_text(object, matches) @@ -146,7 +194,7 @@ module Banzai end def object_link_title(object) - "#{object_class.name.titleize}: #{object.title}" + "#{object_class_title}: #{object.title}" end def object_link_text(object, matches) @@ -157,6 +205,83 @@ module Banzai text end + + # Returns a Hash containing all object references (e.g. issue IDs) per the + # project they belong to. + def references_per_project + @references_per_project ||= begin + refs = Hash.new { |hash, key| hash[key] = Set.new } + + regex = Regexp.union(object_class.reference_pattern, + object_class.link_reference_pattern) + + nodes.each do |node| + node.to_html.scan(regex) do + project = $~[:project] || current_project_path + + refs[project] << $~[object_sym] + end + end + + refs + end + end + + # Returns a Hash containing referenced projects grouped per their full + # path. + def projects_per_reference + @projects_per_reference ||= begin + hash = {} + refs = Set.new + + references_per_project.each do |project_ref, _| + refs << project_ref + end + + find_projects_for_paths(refs.to_a).each do |project| + hash[project.path_with_namespace] = project + end + + hash + end + end + + # Returns the projects for the given paths. + def find_projects_for_paths(paths) + Project.where_paths_in(paths).includes(:namespace) + end + + def current_project_path + @current_project_path ||= project.path_with_namespace + end + + private + + def project_refs_cache + RequestStore[:banzai_project_refs] ||= {} + end + + def find_objects_cache + RequestStore[:banzai_find_objects_cache] ||= Hash.new do |hash, key| + hash[key] = Hash.new { |h, k| h[k] = {} } + end + end + + def url_for_object_cache + RequestStore[:banzai_url_for_object] ||= Hash.new do |hash, key| + hash[key] = Hash.new { |h, k| h[k] = {} } + end + end + + def get_or_set_cache(cache, key) + if cache.key?(key) + cache[key] + else + value = yield + cache[key] = value if key.present? + value + end + end end end end diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index 856f56fb175..fac7dad3243 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -1,4 +1,3 @@ -require 'html/pipeline/filter' require 'uri' module Banzai diff --git a/lib/banzai/filter/commit_range_reference_filter.rb b/lib/banzai/filter/commit_range_reference_filter.rb index 470727ee312..bbb88c979cc 100644 --- a/lib/banzai/filter/commit_range_reference_filter.rb +++ b/lib/banzai/filter/commit_range_reference_filter.rb @@ -4,6 +4,8 @@ module Banzai # # This filter supports cross-project references. class CommitRangeReferenceFilter < AbstractReferenceFilter + self.reference_type = :commit_range + def self.object_class CommitRange end @@ -14,36 +16,20 @@ module Banzai end end - def self.referenced_by(node) - project = Project.find(node.attr("data-project")) rescue nil - return unless project - - id = node.attr("data-commit-range") - range = find_object(project, id) - - return unless range - - { commit_range: range } - end - def initialize(*args) super @commit_map = {} end - def self.find_object(project, id) + def find_object(project, id) range = CommitRange.new(id, project) range.valid_commits? ? range : nil end - def find_object(*args) - self.class.find_object(*args) - end - def url_for_object(range, project) - h = Gitlab::Application.routes.url_helpers + h = Gitlab::Routing.url_helpers h.namespace_project_compare_url(project.namespace, project, range.to_param.merge(only_path: context[:only_path])) end diff --git a/lib/banzai/filter/commit_reference_filter.rb b/lib/banzai/filter/commit_reference_filter.rb index 713a56ba949..2ce1816672b 100644 --- a/lib/banzai/filter/commit_reference_filter.rb +++ b/lib/banzai/filter/commit_reference_filter.rb @@ -4,6 +4,8 @@ module Banzai # # This filter supports cross-project references. class CommitReferenceFilter < AbstractReferenceFilter + self.reference_type = :commit + def self.object_class Commit end @@ -14,30 +16,14 @@ module Banzai end end - def self.referenced_by(node) - project = Project.find(node.attr("data-project")) rescue nil - return unless project - - id = node.attr("data-commit") - commit = find_object(project, id) - - return unless commit - - { commit: commit } - end - - def self.find_object(project, id) + def find_object(project, id) if project && project.valid_repo? project.commit(id) end end - def find_object(*args) - self.class.find_object(*args) - end - def url_for_object(commit, project) - h = Gitlab::Application.routes.url_helpers + h = Gitlab::Routing.url_helpers h.namespace_project_commit_url(project.namespace, project, commit, only_path: context[:only_path]) end diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb index 207437ba7cf..d25de900674 100644 --- a/lib/banzai/filter/emoji_filter.rb +++ b/lib/banzai/filter/emoji_filter.rb @@ -1,7 +1,3 @@ -require 'action_controller' -require 'gitlab_emoji' -require 'html/pipeline/filter' - module Banzai module Filter # HTML filter that replaces :emoji: with images. diff --git a/lib/banzai/filter/external_issue_reference_filter.rb b/lib/banzai/filter/external_issue_reference_filter.rb index edc26386903..eaa702952cc 100644 --- a/lib/banzai/filter/external_issue_reference_filter.rb +++ b/lib/banzai/filter/external_issue_reference_filter.rb @@ -4,6 +4,8 @@ module Banzai # References are ignored if the project doesn't use an external issue # tracker. class ExternalIssueReferenceFilter < ReferenceFilter + self.reference_type = :external_issue + # Public: Find `JIRA-123` issue references in text # # ExternalIssueReferenceFilter.references_in(text) do |match, issue| @@ -21,29 +23,31 @@ module Banzai end end - def self.referenced_by(node) - project = Project.find(node.attr("data-project")) rescue nil - return unless project - - id = node.attr("data-external-issue") - external_issue = ExternalIssue.new(id, project) - - return unless external_issue - - { external_issue: external_issue } - end - def call # Early return if the project isn't using an external tracker - return doc if project.nil? || project.default_issues_tracker? + return doc if project.nil? || default_issues_tracker? - replace_text_nodes_matching(ExternalIssue.reference_pattern) do |content| - issue_link_filter(content) - end + ref_pattern = ExternalIssue.reference_pattern + ref_start_pattern = /\A#{ref_pattern}\z/ + + each_node do |node| + if text_node?(node) + replace_text_when_pattern_matches(node, ref_pattern) do |content| + issue_link_filter(content) + end - replace_link_nodes_with_href(ExternalIssue.reference_pattern) do |link, text| - issue_link_filter(link, link_text: text) + elsif element_node?(node) + yield_valid_link(node) do |link, text| + if link =~ ref_start_pattern + replace_link_node_with_href(node, link) do + issue_link_filter(link, link_text: text) + end + end + end + end end + + doc end # Replace `JIRA-123` issue references in text with links to the referenced @@ -76,6 +80,21 @@ module Banzai def url_for_issue(*args) IssuesHelper.url_for_issue(*args) end + + def default_issues_tracker? + if RequestStore.active? + default_issues_tracker_cache[project.id] ||= + project.default_issues_tracker? + else + project.default_issues_tracker? + end + end + + private + + def default_issues_tracker_cache + RequestStore[:banzai_default_issues_tracker_cache] ||= {} + end end end end diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 8d368f3b9e7..0a29c547a4d 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -1,23 +1,12 @@ -require 'html/pipeline/filter' - module Banzai module Filter - # HTML Filter to add a `rel="nofollow"` attribute to external links - # + # HTML Filter to modify the attributes of external links class ExternalLinkFilter < HTML::Pipeline::Filter def call - doc.search('a').each do |node| - link = node.attr('href') - - next unless link - - # Skip non-HTTP(S) links - next unless link.start_with?('http') - - # Skip internal links - next if link.start_with?(internal_url) - - node.set_attribute('rel', 'nofollow') + # Skip non-HTTP(S) links and internal links + doc.xpath("descendant-or-self::a[starts-with(@href, 'http') and not(starts-with(@href, '#{internal_url}'))]").each do |node| + node.set_attribute('rel', 'nofollow noreferrer') + node.set_attribute('target', '_blank') end doc diff --git a/lib/banzai/filter/gollum_tags_filter.rb b/lib/banzai/filter/gollum_tags_filter.rb index f31f921903b..d08267a9d6c 100644 --- a/lib/banzai/filter/gollum_tags_filter.rb +++ b/lib/banzai/filter/gollum_tags_filter.rb @@ -1,6 +1,3 @@ -require 'banzai' -require 'html/pipeline/filter' - module Banzai module Filter # HTML Filter for parsing Gollum's tags in HTML. It's only parses the @@ -121,7 +118,7 @@ module Banzai end if path - content_tag(:img, nil, src: path) + content_tag(:img, nil, src: path, class: 'gfm') end end @@ -147,12 +144,18 @@ module Banzai # if it is not. def process_page_link_tag(parts) if parts.size == 1 - url = parts[0].strip + reference = parts[0].strip + else + name, reference = *parts.compact.map(&:strip) + end + + if url?(reference) + href = reference else - name, url = *parts.compact.map(&:strip) + href = ::File.join(project_wiki_base_path, reference) end - content_tag(:a, name || url, href: url) + content_tag(:a, name || reference, href: href, class: 'gfm') end def project_wiki diff --git a/lib/banzai/filter/image_link_filter.rb b/lib/banzai/filter/image_link_filter.rb new file mode 100644 index 00000000000..ccd106860bd --- /dev/null +++ b/lib/banzai/filter/image_link_filter.rb @@ -0,0 +1,27 @@ +module Banzai + module Filter + # HTML filter that wraps links around inline images. + class ImageLinkFilter < HTML::Pipeline::Filter + + # Find every image that isn't already wrapped in an `a` tag, create + # a new node (a link to the image source), copy the image as a child + # of the anchor, and then replace the img with the link-wrapped version. + def call + doc.xpath('descendant-or-self::img[not(ancestor::a)]').each do |img| + + link = doc.document.create_element( + 'a', + class: 'no-attachment-icon', + href: img['src'], + target: '_blank' + ) + + link.children = img.clone + img.replace(link) + end + + doc + end + end + end +end diff --git a/lib/banzai/filter/inline_diff_filter.rb b/lib/banzai/filter/inline_diff_filter.rb new file mode 100644 index 00000000000..beb21b19ab3 --- /dev/null +++ b/lib/banzai/filter/inline_diff_filter.rb @@ -0,0 +1,26 @@ +module Banzai + module Filter + class InlineDiffFilter < HTML::Pipeline::Filter + IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set + + def call + search_text_nodes(doc).each do |node| + next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) + + content = node.to_html + html_content = inline_diff_filter(content) + + next if content == html_content + + node.replace(html_content) + end + doc + end + + def inline_diff_filter(text) + html_content = text.gsub(/(?:\[\-(.*?)\-\]|\{\-(.*?)\-\})/, '<span class="idiff left right deletion">\1\2</span>') + html_content.gsub(/(?:\[\+(.*?)\+\]|\{\+(.*?)\+\})/, '<span class="idiff left right addition">\1\2</span>') + end + end + end +end diff --git a/lib/banzai/filter/issue_reference_filter.rb b/lib/banzai/filter/issue_reference_filter.rb index 9f08aa36e8b..2614261f9eb 100644 --- a/lib/banzai/filter/issue_reference_filter.rb +++ b/lib/banzai/filter/issue_reference_filter.rb @@ -5,17 +5,46 @@ module Banzai # # This filter supports cross-project references. class IssueReferenceFilter < AbstractReferenceFilter + self.reference_type = :issue + def self.object_class Issue end - def find_object(project, id) - project.get_issue(id) + def find_object(project, iid) + issues_per_project[project][iid] end def url_for_object(issue, project) IssuesHelper.url_for_issue(issue.iid, project, only_path: context[:only_path]) end + + def project_from_ref(ref) + projects_per_reference[ref || current_project_path] + end + + # Returns a Hash containing the issues per Project instance. + def issues_per_project + @issues_per_project ||= begin + hash = Hash.new { |h, k| h[k] = {} } + + projects_per_reference.each do |path, project| + issue_ids = references_per_project[path] + + next unless project.default_issues_tracker? + + project.issues.where(iid: issue_ids.to_a).each do |issue| + hash[project][issue.iid] = issue + end + end + + hash + end + end + + def find_projects_for_paths(paths) + super(paths).includes(:gitlab_issue_tracker_service) + end end end end diff --git a/lib/banzai/filter/label_reference_filter.rb b/lib/banzai/filter/label_reference_filter.rb index 8147e5ed3c7..e4d3f87d0aa 100644 --- a/lib/banzai/filter/label_reference_filter.rb +++ b/lib/banzai/filter/label_reference_filter.rb @@ -2,6 +2,8 @@ module Banzai module Filter # HTML filter that replaces label references with links. class LabelReferenceFilter < AbstractReferenceFilter + self.reference_type = :label + def self.object_class Label end @@ -18,9 +20,7 @@ module Banzai def references_in(text, pattern = Label.reference_pattern) text.gsub(pattern) do |match| - project = project_from_ref($~[:project]) - params = label_params($~[:label_id].to_i, $~[:label_name]) - label = project.labels.find_by(params) + label = find_label($~[:project], $~[:label_id], $~[:label_name]) if label yield match, label.id, $~[:project], $~ @@ -30,18 +30,12 @@ module Banzai end end - def url_for_object(label, project) - h = Gitlab::Application.routes.url_helpers - h.namespace_project_issues_url(project.namespace, project, label_name: label.name, - only_path: context[:only_path]) - end + def find_label(project_ref, label_id, label_name) + project = project_from_ref(project_ref) + return unless project - def object_link_text(object, matches) - if context[:project] == object.project - LabelsHelper.render_colored_label(object) - else - LabelsHelper.render_colored_cross_project_label(object) - end + label_params = label_params(label_id, label_name) + project.labels.find_by(label_params) end # Parameters to pass to `Label.find_by` based on the given arguments @@ -55,7 +49,21 @@ module Banzai if name { name: name.tr('"', '') } else - { id: id } + { id: id.to_i } + end + end + + def url_for_object(label, project) + h = Gitlab::Routing.url_helpers + h.namespace_project_issues_url(project.namespace, project, label_name: label.name, + only_path: context[:only_path]) + end + + def object_link_text(object, matches) + if context[:project] == object.project + LabelsHelper.render_colored_label(object) + else + LabelsHelper.render_colored_cross_project_label(object) end end end diff --git a/lib/banzai/filter/markdown_filter.rb b/lib/banzai/filter/markdown_filter.rb index 0659fed1419..9b209533a89 100644 --- a/lib/banzai/filter/markdown_filter.rb +++ b/lib/banzai/filter/markdown_filter.rb @@ -1,5 +1,3 @@ -require 'html/pipeline/filter' - module Banzai module Filter class MarkdownFilter < HTML::Pipeline::TextFilter diff --git a/lib/banzai/filter/merge_request_reference_filter.rb b/lib/banzai/filter/merge_request_reference_filter.rb index 57c71708992..ac5216d9cfb 100644 --- a/lib/banzai/filter/merge_request_reference_filter.rb +++ b/lib/banzai/filter/merge_request_reference_filter.rb @@ -5,6 +5,8 @@ module Banzai # # This filter supports cross-project references. class MergeRequestReferenceFilter < AbstractReferenceFilter + self.reference_type = :merge_request + def self.object_class MergeRequest end @@ -14,7 +16,7 @@ module Banzai end def url_for_object(mr, project) - h = Gitlab::Application.routes.url_helpers + h = Gitlab::Routing.url_helpers h.namespace_project_merge_request_url(project.namespace, project, mr, only_path: context[:only_path]) end diff --git a/lib/banzai/filter/milestone_reference_filter.rb b/lib/banzai/filter/milestone_reference_filter.rb index e88b27c1fae..ca686c87d97 100644 --- a/lib/banzai/filter/milestone_reference_filter.rb +++ b/lib/banzai/filter/milestone_reference_filter.rb @@ -1,9 +1,9 @@ -require 'banzai' - module Banzai module Filter # HTML filter that replaces milestone references with links. class MilestoneReferenceFilter < AbstractReferenceFilter + self.reference_type = :milestone + def self.object_class Milestone end @@ -12,11 +12,53 @@ module Banzai project.milestones.find_by(iid: id) end - def url_for_object(issue, project) - h = Gitlab::Application.routes.url_helpers + def references_in(text, pattern = Milestone.reference_pattern) + # We'll handle here the references that follow the `reference_pattern`. + # Other patterns (for example, the link pattern) are handled by the + # default implementation. + return super(text, pattern) if pattern != Milestone.reference_pattern + + text.gsub(pattern) do |match| + milestone = find_milestone($~[:project], $~[:milestone_iid], $~[:milestone_name]) + + if milestone + yield match, milestone.iid, $~[:project], $~ + else + match + end + end + end + + def find_milestone(project_ref, milestone_id, milestone_name) + project = project_from_ref(project_ref) + return unless project + + milestone_params = milestone_params(milestone_id, milestone_name) + project.milestones.find_by(milestone_params) + end + + def milestone_params(iid, name) + if name + { name: name.tr('"', '') } + else + { iid: iid.to_i } + end + end + + def url_for_object(milestone, project) + h = Gitlab::Routing.url_helpers h.namespace_project_milestone_url(project.namespace, project, milestone, only_path: context[:only_path]) end + + def object_link_text(object, matches) + if context[:project] == object.project + super + else + "#{escape_once(super)} <i>in #{escape_once(object.project.path_with_namespace)}</i>". + html_safe + end + end end end end diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/redactor_filter.rb index 7141ed7c9bd..c753a84a20d 100644 --- a/lib/banzai/filter/redactor_filter.rb +++ b/lib/banzai/filter/redactor_filter.rb @@ -1,5 +1,3 @@ -require 'html/pipeline/filter' - module Banzai module Filter # HTML filter that removes references to records that the current user does @@ -9,8 +7,11 @@ module Banzai # class RedactorFilter < HTML::Pipeline::Filter def call - Querying.css(doc, 'a.gfm').each do |node| - unless user_can_see_reference?(node) + nodes = Querying.css(doc, 'a.gfm[data-reference-type]') + visible = nodes_visible_to_user(nodes) + + nodes.each do |node| + unless visible.include?(node) # The reference should be replaced by the original text, # which is not always the same as the rendered text. text = node.attr('data-original') || node.text @@ -23,20 +24,30 @@ module Banzai private - def user_can_see_reference?(node) - if node.has_attribute?('data-reference-filter') - reference_type = node.attr('data-reference-filter') - reference_filter = Banzai::Filter.const_get(reference_type) + def nodes_visible_to_user(nodes) + per_type = Hash.new { |h, k| h[k] = [] } + visible = Set.new + + nodes.each do |node| + per_type[node.attr('data-reference-type')] << node + end + + per_type.each do |type, nodes| + parser = Banzai::ReferenceParser[type].new(project, current_user) - reference_filter.user_can_see_reference?(current_user, node, context) - else - true + visible.merge(parser.nodes_visible_to_user(current_user, nodes)) end + + visible end def current_user context[:current_user] end + + def project + context[:project] + end end end end diff --git a/lib/banzai/filter/reference_filter.rb b/lib/banzai/filter/reference_filter.rb index 3637b1bac94..2d6f34c9cd8 100644 --- a/lib/banzai/filter/reference_filter.rb +++ b/lib/banzai/filter/reference_filter.rb @@ -1,6 +1,3 @@ -require 'active_support/core_ext/string/output_safety' -require 'html/pipeline/filter' - module Banzai module Filter # Base class for GitLab Flavored Markdown reference filters. @@ -11,24 +8,8 @@ module Banzai # :project (required) - Current project, ignored if reference is cross-project. # :only_path - Generate path-only links. class ReferenceFilter < HTML::Pipeline::Filter - def self.user_can_see_reference?(user, node, context) - if node.has_attribute?('data-project') - project_id = node.attr('data-project').to_i - return true if project_id == context[:project].try(:id) - - project = Project.find(project_id) rescue nil - Ability.abilities.allowed?(user, :read_project, project) - else - true - end - end - - def self.user_can_reference?(user, node, context) - true - end - - def self.referenced_by(node) - raise NotImplementedError, "#{self} does not implement #{__method__}" + class << self + attr_accessor :reference_type end # Returns a data attribute String to attach to a reference link @@ -46,7 +27,10 @@ module Banzai # # Returns a String def data_attribute(attributes = {}) - attributes[:reference_filter] = self.class.name.demodulize + attributes = attributes.reject { |_, v| v.nil? } + + attributes[:reference_type] = self.class.reference_type + attributes.delete(:original) if context[:no_original_data] attributes.map { |key, value| %Q(data-#{key.to_s.dasherize}="#{escape_once(value)}") }.join(" ") end @@ -54,18 +38,13 @@ module Banzai html.html_safe? ? html : ERB::Util.html_escape_once(html) end - def ignore_parents - @ignore_parents ||= begin - # Don't look for references in text nodes that are children of these - # elements. + def ignore_ancestor_query + @ignore_ancestor_query ||= begin parents = %w(pre code a style) parents << 'blockquote' if context[:ignore_blockquotes] - parents.to_set - end - end - def ignored_ancestry?(node) - has_ancestor?(node, ignore_parents) + parents.map { |n| "ancestor::#{n}" }.join(' or ') + end end def project @@ -76,119 +55,73 @@ module Banzai "gfm gfm-#{type}" end - # Iterate through the document's text nodes, yielding the current node's - # content if: - # - # * The `project` context value is present AND - # * The node's content matches `pattern` AND - # * The node is not an ancestor of an ignored node type - # - # pattern - Regex pattern against which to match the node's content - # - # Yields the current node's String contents. The result of the block will - # replace the node's existing content and update the current document. + # Ensure that a :project key exists in context # - # Returns the updated Nokogiri::HTML::DocumentFragment object. - def replace_text_nodes_matching(pattern) - return doc if project.nil? - - search_text_nodes(doc).each do |node| - next if ignored_ancestry?(node) - next unless node.text =~ pattern - - content = node.to_html - - html = yield content - - next if html == content - - node.replace(html) - end - - doc + # Note that while the key might exist, its value could be nil! + def validate + needs :project end - # Iterate through the document's link nodes, yielding the current node's - # content if: - # - # * The `project` context value is present AND - # * The node's content matches `pattern` - # - # pattern - Regex pattern against which to match the node's content + # Iterates over all <a> and text() nodes in a document. # - # Yields the current node's String contents. The result of the block will - # replace the node and update the current document. - # - # Returns the updated Nokogiri::HTML::DocumentFragment object. - def replace_link_nodes_with_text(pattern) - return doc if project.nil? - - doc.xpath('descendant-or-self::a').each do |node| - klass = node.attr('class') - next if klass && klass.include?('gfm') - - link = node.attr('href') - text = node.text + # Nodes are skipped whenever their ancestor is one of the nodes returned + # by `ignore_ancestor_query`. Link tags are not processed if they have a + # "gfm" class or the "href" attribute is empty. + def each_node + return to_enum(__method__) unless block_given? - next unless link && text + query = %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})] + | descendant-or-self::a[ + not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "") + ]} - link = CGI.unescape(link) - next unless link.force_encoding('UTF-8').valid_encoding? - # Ignore ending punctionation like periods or commas - next unless link == text && text =~ /\A#{pattern}/ + doc.xpath(query).each do |node| + yield node + end + end - html = yield text + # Returns an Array containing all HTML nodes. + def nodes + @nodes ||= each_node.to_a + end - next if html == text + # Yields the link's URL and text whenever the node is a valid <a> tag. + def yield_valid_link(node) + link = CGI.unescape(node.attr('href').to_s) + text = node.text - node.replace(html) - end + return unless link.force_encoding('UTF-8').valid_encoding? - doc + yield link, text end - # Iterate through the document's link nodes, yielding the current node's - # content if: - # - # * The `project` context value is present AND - # * The node's HREF matches `pattern` - # - # pattern - Regex pattern against which to match the node's HREF - # - # Yields the current node's String HREF and String content. - # The result of the block will replace the node and update the current document. - # - # Returns the updated Nokogiri::HTML::DocumentFragment object. - def replace_link_nodes_with_href(pattern) - return doc if project.nil? + def replace_text_when_pattern_matches(node, pattern) + return unless node.text =~ pattern - doc.xpath('descendant-or-self::a').each do |node| - klass = node.attr('class') - next if klass && klass.include?('gfm') + content = node.to_html + html = yield content - link = node.attr('href') - text = node.text + node.replace(html) unless content == html + end - next unless link && text - link = CGI.unescape(link) - next unless link.force_encoding('UTF-8').valid_encoding? - next unless link && link =~ /\A#{pattern}\z/ + def replace_link_node_with_text(node, link) + html = yield - html = yield link, text + node.replace(html) unless html == node.text + end - next if html == link + def replace_link_node_with_href(node, link) + html = yield - node.replace(html) - end + node.replace(html) unless html == link + end - doc + def text_node?(node) + node.is_a?(Nokogiri::XML::Text) end - # Ensure that a :project key exists in context - # - # Note that while the key might exist, its value could be nil! - def validate - needs :project + def element_node?(node) + node.is_a?(Nokogiri::XML::Element) end end end diff --git a/lib/banzai/filter/reference_gatherer_filter.rb b/lib/banzai/filter/reference_gatherer_filter.rb deleted file mode 100644 index 86d484feb90..00000000000 --- a/lib/banzai/filter/reference_gatherer_filter.rb +++ /dev/null @@ -1,67 +0,0 @@ -require 'html/pipeline/filter' - -module Banzai - module Filter - # HTML filter that gathers all referenced records that the current user has - # permission to view. - # - # Expected to be run in its own post-processing pipeline. - # - class ReferenceGathererFilter < HTML::Pipeline::Filter - def initialize(*) - super - - result[:references] ||= Hash.new { |hash, type| hash[type] = [] } - end - - def call - Querying.css(doc, 'a.gfm').each do |node| - gather_references(node) - end - - load_lazy_references unless ReferenceExtractor.lazy? - - doc - end - - private - - def gather_references(node) - return unless node.has_attribute?('data-reference-filter') - - reference_type = node.attr('data-reference-filter') - reference_filter = Banzai::Filter.const_get(reference_type) - - return if context[:reference_filter] && reference_filter != context[:reference_filter] - - return if author && !reference_filter.user_can_reference?(author, node, context) - - return unless reference_filter.user_can_see_reference?(current_user, node, context) - - references = reference_filter.referenced_by(node) - return unless references - - references.each do |type, values| - Array.wrap(values).each do |value| - result[:references][type] << value - end - end - end - - def load_lazy_references - refs = result[:references] - refs.each do |type, values| - refs[type] = ReferenceExtractor.lazily(values) - end - end - - def current_user - context[:current_user] - end - - def author - context[:author] - end - end - end -end diff --git a/lib/banzai/filter/relative_link_filter.rb b/lib/banzai/filter/relative_link_filter.rb index 41380627d39..ea21c7b041c 100644 --- a/lib/banzai/filter/relative_link_filter.rb +++ b/lib/banzai/filter/relative_link_filter.rb @@ -1,4 +1,3 @@ -require 'html/pipeline/filter' require 'uri' module Banzai diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index e8011519608..ca80aac5a08 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -1,6 +1,3 @@ -require 'html/pipeline/filter' -require 'html/pipeline/sanitization_filter' - module Banzai module Filter # Sanitize HTML @@ -66,7 +63,7 @@ module Banzai begin uri = Addressable::URI.parse(node['href']) - uri.scheme.strip! if uri.scheme + uri.scheme = uri.scheme.strip.downcase if uri.scheme node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(uri.scheme) rescue Addressable::URI::InvalidURIError diff --git a/lib/banzai/filter/snippet_reference_filter.rb b/lib/banzai/filter/snippet_reference_filter.rb index c870a42f741..212a0bbf2a0 100644 --- a/lib/banzai/filter/snippet_reference_filter.rb +++ b/lib/banzai/filter/snippet_reference_filter.rb @@ -5,6 +5,8 @@ module Banzai # # This filter supports cross-project references. class SnippetReferenceFilter < AbstractReferenceFilter + self.reference_type = :snippet + def self.object_class Snippet end @@ -14,7 +16,7 @@ module Banzai end def url_for_object(snippet, project) - h = Gitlab::Application.routes.url_helpers + h = Gitlab::Routing.url_helpers h.namespace_project_snippet_url(project.namespace, project, snippet, only_path: context[:only_path]) end diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb index 8c5855e5ffc..62a79c62e20 100644 --- a/lib/banzai/filter/syntax_highlight_filter.rb +++ b/lib/banzai/filter/syntax_highlight_filter.rb @@ -1,4 +1,3 @@ -require 'html/pipeline/filter' require 'rouge/plugins/redcarpet' module Banzai diff --git a/lib/banzai/filter/table_of_contents_filter.rb b/lib/banzai/filter/table_of_contents_filter.rb index 4056dcd6d64..a4eda6fdf76 100644 --- a/lib/banzai/filter/table_of_contents_filter.rb +++ b/lib/banzai/filter/table_of_contents_filter.rb @@ -1,5 +1,3 @@ -require 'html/pipeline/filter' - module Banzai module Filter # HTML filter that adds an anchor child element to all Headers in a diff --git a/lib/banzai/filter/upload_link_filter.rb b/lib/banzai/filter/upload_link_filter.rb index f642aee0967..45bb66dc99f 100644 --- a/lib/banzai/filter/upload_link_filter.rb +++ b/lib/banzai/filter/upload_link_filter.rb @@ -1,4 +1,3 @@ -require 'html/pipeline/filter' require 'uri' module Banzai @@ -9,11 +8,13 @@ module Banzai # class UploadLinkFilter < HTML::Pipeline::Filter def call - doc.search('a').each do |el| + return doc unless project + + doc.xpath('descendant-or-self::a[starts-with(@href, "/uploads/")]').each do |el| process_link_attr el.attribute('href') end - doc.search('img').each do |el| + doc.xpath('descendant-or-self::img[starts-with(@src, "/uploads/")]').each do |el| process_link_attr el.attribute('src') end @@ -23,16 +24,15 @@ module Banzai protected def process_link_attr(html_attr) - return if html_attr.blank? - - uri = html_attr.value - if uri.starts_with?("/uploads/") - html_attr.value = build_url(uri).to_s - end + html_attr.value = build_url(html_attr.value).to_s end def build_url(uri) - File.join(Gitlab.config.gitlab.url, context[:project].path_with_namespace, uri) + File.join(Gitlab.config.gitlab.url, project.path_with_namespace, uri) + end + + def project + context[:project] end # Ensure that a :project key exists in context diff --git a/lib/banzai/filter/user_reference_filter.rb b/lib/banzai/filter/user_reference_filter.rb index 24f16f8b547..5b0a6d8541b 100644 --- a/lib/banzai/filter/user_reference_filter.rb +++ b/lib/banzai/filter/user_reference_filter.rb @@ -4,6 +4,8 @@ module Banzai # # A special `@all` reference is also supported. class UserReferenceFilter < ReferenceFilter + self.reference_type = :user + # Public: Find `@user` user references in text # # UserReferenceFilter.references_in(text) do |match, username| @@ -21,51 +23,29 @@ module Banzai end end - def self.referenced_by(node) - if node.has_attribute?('data-group') - group = Group.find(node.attr('data-group')) rescue nil - return unless group - - { user: group.users } - elsif node.has_attribute?('data-user') - { user: LazyReference.new(User, node.attr('data-user')) } - elsif node.has_attribute?('data-project') - project = Project.find(node.attr('data-project')) rescue nil - return unless project - - { user: project.team.members.flatten } - end - end - - def self.user_can_see_reference?(user, node, context) - if node.has_attribute?('data-group') - group = Group.find(node.attr('data-group')) rescue nil - Ability.abilities.allowed?(user, :read_group, group) - else - super - end - end - - def self.user_can_reference?(user, node, context) - # Only team members can reference `@all` - if node.has_attribute?('data-project') - project = Project.find(node.attr('data-project')) rescue nil - return false unless project - - user && project.team.member?(user) - else - super - end - end - def call - replace_text_nodes_matching(User.reference_pattern) do |content| - user_link_filter(content) + return doc if project.nil? + + ref_pattern = User.reference_pattern + ref_pattern_start = /\A#{ref_pattern}\z/ + + nodes.each do |node| + if text_node?(node) + replace_text_when_pattern_matches(node, ref_pattern) do |content| + user_link_filter(content) + end + elsif element_node?(node) + yield_valid_link(node) do |link, text| + if link =~ ref_pattern_start + replace_link_node_with_href(node, link) do + user_link_filter(link, link_text: text) + end + end + end + end end - replace_link_nodes_with_href(User.reference_pattern) do |link, text| - user_link_filter(link, link_text: text) - end + doc end # Replace `@user` user references in text with links to the referenced @@ -79,7 +59,7 @@ module Banzai self.class.references_in(text) do |match, username| if username == 'all' link_to_all(link_text: link_text) - elsif namespace = Namespace.find_by(path: username) + elsif namespace = namespaces[username] link_to_namespace(namespace, link_text: link_text) || match else match @@ -87,10 +67,35 @@ module Banzai end end + # Returns a Hash containing all Namespace objects for the username + # references in the current document. + # + # The keys of this Hash are the namespace paths, the values the + # corresponding Namespace objects. + def namespaces + @namespaces ||= + Namespace.where(path: usernames).each_with_object({}) do |row, hash| + hash[row.path] = row + end + end + + # Returns all usernames referenced in the current document. + def usernames + refs = Set.new + + nodes.each do |node| + node.to_html.scan(User.reference_pattern) do + refs << $~[:user] + end + end + + refs.to_a + end + private def urls - Gitlab::Application.routes.url_helpers + Gitlab::Routing.url_helpers end def link_class @@ -99,9 +104,12 @@ module Banzai def link_to_all(link_text: nil) project = context[:project] + author = context[:author] + url = urls.namespace_project_url(project.namespace, project, only_path: context[:only_path]) - data = data_attribute(project: project.id) + + data = data_attribute(project: project.id, author: author.try(:id)) text = link_text || User.reference_prefix + 'all' link_tag(url, data, text) diff --git a/lib/banzai/filter/wiki_link_filter.rb b/lib/banzai/filter/wiki_link_filter.rb new file mode 100644 index 00000000000..37a2779d453 --- /dev/null +++ b/lib/banzai/filter/wiki_link_filter.rb @@ -0,0 +1,41 @@ +require 'uri' + +module Banzai + module Filter + # HTML filter that "fixes" links to pages/files in a wiki. + # Rewrite rules are documented in the `WikiPipeline` spec. + # + # Context options: + # :project_wiki + class WikiLinkFilter < HTML::Pipeline::Filter + + def call + return doc unless project_wiki? + + doc.search('a:not(.gfm)').each do |el| + process_link_attr el.attribute('href') + end + + doc + end + + protected + + def project_wiki? + !context[:project_wiki].nil? + end + + def process_link_attr(html_attr) + return if html_attr.blank? + + html_attr.value = apply_rewrite_rules(html_attr.value) + rescue URI::Error + # noop + end + + def apply_rewrite_rules(link_string) + Rewriter.new(link_string, wiki: context[:project_wiki], slug: context[:page_slug]).apply_rules + end + end + end +end diff --git a/lib/banzai/filter/wiki_link_filter/rewriter.rb b/lib/banzai/filter/wiki_link_filter/rewriter.rb new file mode 100644 index 00000000000..2e2c8da311e --- /dev/null +++ b/lib/banzai/filter/wiki_link_filter/rewriter.rb @@ -0,0 +1,40 @@ +module Banzai + module Filter + class WikiLinkFilter < HTML::Pipeline::Filter + class Rewriter + def initialize(link_string, wiki:, slug:) + @uri = Addressable::URI.parse(link_string) + @wiki_base_path = wiki && wiki.wiki_base_path + @slug = slug + end + + def apply_rules + apply_file_link_rules! + apply_hierarchical_link_rules! + apply_relative_link_rules! + @uri.to_s + end + + private + + # Of the form 'file.md' + def apply_file_link_rules! + @uri = Addressable::URI.join(@slug, @uri) if @uri.extname.present? + end + + # Of the form `./link`, `../link`, or similar + def apply_hierarchical_link_rules! + @uri = Addressable::URI.join(@slug, @uri) if @uri.to_s[0] == '.' + end + + # Any link _not_ of the form `http://example.com/` + def apply_relative_link_rules! + if @uri.relative? && @uri.path.present? + link = ::File.join(@wiki_base_path, @uri.path) + @uri = Addressable::URI.parse(link) + end + end + end + end + end +end diff --git a/lib/banzai/filter/yaml_front_matter_filter.rb b/lib/banzai/filter/yaml_front_matter_filter.rb index e4e2f3f228d..58e3e81209e 100644 --- a/lib/banzai/filter/yaml_front_matter_filter.rb +++ b/lib/banzai/filter/yaml_front_matter_filter.rb @@ -1,6 +1,3 @@ -require 'html/pipeline/filter' -require 'yaml' - module Banzai module Filter class YamlFrontMatterFilter < HTML::Pipeline::Filter |