summaryrefslogtreecommitdiff
path: root/lib/banzai/reference_parser
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2016-05-26 13:16:43 +0200
committerYorick Peterse <yorickpeterse@gmail.com>2016-05-26 17:14:00 +0200
commit86166d28029d5fcc729f7b7f5a41635c2e783a9e (patch)
treed4e9354a4daafc7b298bc7a73980166e41d55bf7 /lib/banzai/reference_parser
parent94d5416db6415b06706204fb4a4df0100bcab7be (diff)
downloadgitlab-ce-86166d28029d5fcc729f7b7f5a41635c2e783a9e.tar.gz
Split Markdown rendering & reference gathering
This splits the Markdown rendering and reference extraction phases into two distinct code bases. The reference extraction phase no longer relies on the html-pipeline Gem (and any related code) and allows for extracting of references from multiple HTML nodes in a single pass. This means that if you want to extract user references from 200 comments you no longer need to run 200 times N number of queries, instead only a handful of queries may be needed.
Diffstat (limited to 'lib/banzai/reference_parser')
-rw-r--r--lib/banzai/reference_parser/base_parser.rb204
-rw-r--r--lib/banzai/reference_parser/commit_parser.rb34
-rw-r--r--lib/banzai/reference_parser/commit_range_parser.rb38
-rw-r--r--lib/banzai/reference_parser/external_issue_parser.rb25
-rw-r--r--lib/banzai/reference_parser/issue_parser.rb40
-rw-r--r--lib/banzai/reference_parser/label_parser.rb11
-rw-r--r--lib/banzai/reference_parser/merge_request_parser.rb11
-rw-r--r--lib/banzai/reference_parser/milestone_parser.rb11
-rw-r--r--lib/banzai/reference_parser/snippet_parser.rb11
-rw-r--r--lib/banzai/reference_parser/user_parser.rb92
10 files changed, 477 insertions, 0 deletions
diff --git a/lib/banzai/reference_parser/base_parser.rb b/lib/banzai/reference_parser/base_parser.rb
new file mode 100644
index 00000000000..3d7b9c4a024
--- /dev/null
+++ b/lib/banzai/reference_parser/base_parser.rb
@@ -0,0 +1,204 @@
+module Banzai
+ module ReferenceParser
+ # Base class for reference parsing classes.
+ #
+ # Each parser should also specify its reference type by calling
+ # `self.reference_type = ...` in the body of the class. The value of this
+ # method should be a symbol such as `:issue` or `:merge_request`. For
+ # example:
+ #
+ # class IssueParser < BaseParser
+ # self.reference_type = :issue
+ # end
+ #
+ # The reference type is used to determine what nodes to pass to the
+ # `referenced_by` method.
+ #
+ # Parser classes should either implement the instance method
+ # `references_relation` or overwrite `referenced_by`. The
+ # `references_relation` method is supposed to return an
+ # ActiveRecord::Relation used as a base relation for retrieving the objects
+ # referenced in a set of HTML nodes.
+ #
+ # Each class can implement two additional methods:
+ #
+ # * `nodes_user_can_reference`: returns an Array of nodes the given user can
+ # refer to.
+ # * `nodes_visible_to_user`: returns an Array of nodes that are visible to
+ # the given user.
+ #
+ # You only need to overwrite these methods if you want to tweak who can see
+ # which references. For example, the IssueParser class defines its own
+ # `nodes_visible_to_user` method so it can ensure users can only see issues
+ # they have access to.
+ class BaseParser
+ class << self
+ attr_accessor :reference_type
+ end
+
+ # Returns the attribute name containing the value for every object to be
+ # parsed by the current parser.
+ #
+ # For example, for a parser class that returns "Animal" objects this
+ # attribute would be "data-animal".
+ def self.data_attribute
+ @data_attribute ||= "data-#{reference_type.to_s.dasherize}"
+ end
+
+ def initialize(project = nil, current_user = nil)
+ @project = project
+ @current_user = current_user
+ end
+
+ # Returns all the nodes containing references that the user can refer to.
+ def nodes_user_can_reference(user, nodes)
+ nodes
+ end
+
+ # Returns all the nodes that are visible to the given user.
+ def nodes_visible_to_user(user, nodes)
+ projects = lazy { projects_for_nodes(nodes) }
+ project_attr = 'data-project'
+
+ nodes.select do |node|
+ if node.has_attribute?(project_attr)
+ node_id = node.attr(project_attr).to_i
+
+ if project && project.id == node_id
+ true
+ else
+ can?(user, :read_project, projects[node_id])
+ end
+ else
+ true
+ end
+ end
+ end
+
+ # Returns an Array of objects referenced by any of the given HTML nodes.
+ def referenced_by(nodes)
+ ids = unique_attribute_values(nodes, self.class.data_attribute)
+
+ references_relation.where(id: ids)
+ end
+
+ # Returns the ActiveRecord::Relation to use for querying references in the
+ # DB.
+ def references_relation
+ raise NotImplementedError,
+ "#{self.class} does not implement #{__method__}"
+ end
+
+ # Returns a Hash containing attribute values per project ID.
+ #
+ # The returned Hash uses the following format:
+ #
+ # { project id => [value1, value2, ...] }
+ #
+ # nodes - An Array of HTML nodes to process.
+ # attribute - The name of the attribute (as a String) for which to gather
+ # values.
+ #
+ # Returns a Hash.
+ def gather_attributes_per_project(nodes, attribute)
+ per_project = Hash.new { |hash, key| hash[key] = Set.new }
+
+ nodes.each do |node|
+ project_id = node.attr('data-project').to_i
+ id = node.attr(attribute)
+
+ per_project[project_id] << id if id
+ end
+
+ per_project
+ end
+
+ # Returns a Hash containing objects for an attribute grouped per their
+ # IDs.
+ #
+ # The returned Hash uses the following format:
+ #
+ # { id value => row }
+ #
+ # nodes - An Array of HTML nodes to process.
+ #
+ # collection - The model or ActiveRecord relation to use for retrieving
+ # rows from the database.
+ #
+ # attribute - The name of the attribute containing the primary key values
+ # for every row.
+ #
+ # Returns a Hash.
+ def grouped_objects_for_nodes(nodes, collection, attribute)
+ return {} if nodes.empty?
+
+ ids = unique_attribute_values(nodes, attribute)
+
+ collection.where(id: ids).each_with_object({}) do |row, hash|
+ hash[row.id] = row
+ end
+ end
+
+ # Returns an Array containing all unique values of an attribute of the
+ # given nodes.
+ def unique_attribute_values(nodes, attribute)
+ values = Set.new
+
+ nodes.each do |node|
+ if node.has_attribute?(attribute)
+ values << node.attr(attribute)
+ end
+ end
+
+ values.to_a
+ end
+
+ # Processes the list of HTML documents and returns an Array containing all
+ # the references.
+ def process(documents)
+ type = self.class.reference_type
+
+ nodes = documents.flat_map do |document|
+ Querying.css(document, "a[data-reference-type='#{type}'].gfm").to_a
+ end
+
+ gather_references(nodes)
+ end
+
+ # Gathers the references for the given HTML nodes.
+ def gather_references(nodes)
+ nodes = nodes_user_can_reference(current_user, nodes)
+ nodes = nodes_visible_to_user(current_user, nodes)
+
+ referenced_by(nodes)
+ end
+
+ # Returns a Hash containing the projects for a given list of HTML nodes.
+ #
+ # The returned Hash uses the following format:
+ #
+ # { project ID => project }
+ #
+ def projects_for_nodes(nodes)
+ @projects_for_nodes ||=
+ grouped_objects_for_nodes(nodes, Project, 'data-project')
+ end
+
+ def can?(user, permission, subject)
+ Ability.abilities.allowed?(user, permission, subject)
+ end
+
+ def find_projects_for_hash_keys(hash)
+ Project.where(id: hash.keys)
+ end
+
+ private
+
+ attr_reader :current_user, :project
+
+ def lazy(&block)
+ Gitlab::Lazy.new(&block)
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/commit_parser.rb b/lib/banzai/reference_parser/commit_parser.rb
new file mode 100644
index 00000000000..0fee9d267de
--- /dev/null
+++ b/lib/banzai/reference_parser/commit_parser.rb
@@ -0,0 +1,34 @@
+module Banzai
+ module ReferenceParser
+ class CommitParser < BaseParser
+ self.reference_type = :commit
+
+ def referenced_by(nodes)
+ commit_ids = commit_ids_per_project(nodes)
+ projects = find_projects_for_hash_keys(commit_ids)
+
+ projects.flat_map do |project|
+ find_commits(project, commit_ids[project.id])
+ end
+ end
+
+ def commit_ids_per_project(nodes)
+ gather_attributes_per_project(nodes, self.class.data_attribute)
+ end
+
+ def find_commits(project, ids)
+ commits = []
+
+ return commits unless project.valid_repo?
+
+ ids.each do |id|
+ commit = project.commit(id)
+
+ commits << commit if commit
+ end
+
+ commits
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/commit_range_parser.rb b/lib/banzai/reference_parser/commit_range_parser.rb
new file mode 100644
index 00000000000..69d01f8db15
--- /dev/null
+++ b/lib/banzai/reference_parser/commit_range_parser.rb
@@ -0,0 +1,38 @@
+module Banzai
+ module ReferenceParser
+ class CommitRangeParser < BaseParser
+ self.reference_type = :commit_range
+
+ def referenced_by(nodes)
+ range_ids = commit_range_ids_per_project(nodes)
+ projects = find_projects_for_hash_keys(range_ids)
+
+ projects.flat_map do |project|
+ find_ranges(project, range_ids[project.id])
+ end
+ end
+
+ def commit_range_ids_per_project(nodes)
+ gather_attributes_per_project(nodes, self.class.data_attribute)
+ end
+
+ def find_ranges(project, range_ids)
+ ranges = []
+
+ range_ids.each do |id|
+ range = find_object(project, id)
+
+ ranges << range if range
+ end
+
+ ranges
+ end
+
+ def find_object(project, id)
+ range = CommitRange.new(id, project)
+
+ range.valid_commits? ? range : nil
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/external_issue_parser.rb b/lib/banzai/reference_parser/external_issue_parser.rb
new file mode 100644
index 00000000000..a1264db2111
--- /dev/null
+++ b/lib/banzai/reference_parser/external_issue_parser.rb
@@ -0,0 +1,25 @@
+module Banzai
+ module ReferenceParser
+ class ExternalIssueParser < BaseParser
+ self.reference_type = :external_issue
+
+ def referenced_by(nodes)
+ issue_ids = issue_ids_per_project(nodes)
+ projects = find_projects_for_hash_keys(issue_ids)
+ issues = []
+
+ projects.each do |project|
+ issue_ids[project.id].each do |id|
+ issues << ExternalIssue.new(id, project)
+ end
+ end
+
+ issues
+ end
+
+ def issue_ids_per_project(nodes)
+ gather_attributes_per_project(nodes, self.class.data_attribute)
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/issue_parser.rb b/lib/banzai/reference_parser/issue_parser.rb
new file mode 100644
index 00000000000..24076e3d9ec
--- /dev/null
+++ b/lib/banzai/reference_parser/issue_parser.rb
@@ -0,0 +1,40 @@
+module Banzai
+ module ReferenceParser
+ class IssueParser < BaseParser
+ self.reference_type = :issue
+
+ def nodes_visible_to_user(user, nodes)
+ # It is not possible to check access rights for external issue trackers
+ return nodes if project && project.external_issue_tracker
+
+ issues = issues_for_nodes(nodes)
+
+ nodes.select do |node|
+ issue = issue_for_node(issues, node)
+
+ issue ? can?(user, :read_issue, issue) : false
+ end
+ end
+
+ def referenced_by(nodes)
+ issues = issues_for_nodes(nodes)
+
+ nodes.map { |node| issue_for_node(issues, node) }.uniq
+ end
+
+ def issues_for_nodes(nodes)
+ @issues_for_nodes ||= grouped_objects_for_nodes(
+ nodes,
+ Issue.all.includes(:author, :assignee, :project),
+ self.class.data_attribute
+ )
+ end
+
+ private
+
+ def issue_for_node(issues, node)
+ issues[node.attr(self.class.data_attribute).to_i]
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/label_parser.rb b/lib/banzai/reference_parser/label_parser.rb
new file mode 100644
index 00000000000..e5d1eb11d7f
--- /dev/null
+++ b/lib/banzai/reference_parser/label_parser.rb
@@ -0,0 +1,11 @@
+module Banzai
+ module ReferenceParser
+ class LabelParser < BaseParser
+ self.reference_type = :label
+
+ def references_relation
+ Label
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/merge_request_parser.rb b/lib/banzai/reference_parser/merge_request_parser.rb
new file mode 100644
index 00000000000..c9a9ca79c09
--- /dev/null
+++ b/lib/banzai/reference_parser/merge_request_parser.rb
@@ -0,0 +1,11 @@
+module Banzai
+ module ReferenceParser
+ class MergeRequestParser < BaseParser
+ self.reference_type = :merge_request
+
+ def references_relation
+ MergeRequest.includes(:author, :assignee, :target_project)
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/milestone_parser.rb b/lib/banzai/reference_parser/milestone_parser.rb
new file mode 100644
index 00000000000..a000ac61e5c
--- /dev/null
+++ b/lib/banzai/reference_parser/milestone_parser.rb
@@ -0,0 +1,11 @@
+module Banzai
+ module ReferenceParser
+ class MilestoneParser < BaseParser
+ self.reference_type = :milestone
+
+ def references_relation
+ Milestone
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/snippet_parser.rb b/lib/banzai/reference_parser/snippet_parser.rb
new file mode 100644
index 00000000000..fa71b3c952a
--- /dev/null
+++ b/lib/banzai/reference_parser/snippet_parser.rb
@@ -0,0 +1,11 @@
+module Banzai
+ module ReferenceParser
+ class SnippetParser < BaseParser
+ self.reference_type = :snippet
+
+ def references_relation
+ Snippet
+ end
+ end
+ end
+end
diff --git a/lib/banzai/reference_parser/user_parser.rb b/lib/banzai/reference_parser/user_parser.rb
new file mode 100644
index 00000000000..a12b0d19560
--- /dev/null
+++ b/lib/banzai/reference_parser/user_parser.rb
@@ -0,0 +1,92 @@
+module Banzai
+ module ReferenceParser
+ class UserParser < BaseParser
+ self.reference_type = :user
+
+ def referenced_by(nodes)
+ group_ids = []
+ user_ids = []
+ project_ids = []
+
+ nodes.each do |node|
+ if node.has_attribute?('data-group')
+ group_ids << node.attr('data-group').to_i
+ elsif node.has_attribute?(self.class.data_attribute)
+ user_ids << node.attr(self.class.data_attribute).to_i
+ elsif node.has_attribute?('data-project')
+ project_ids << node.attr('data-project').to_i
+ end
+ end
+
+ find_users_for_groups(group_ids) | find_users(user_ids) |
+ find_users_for_projects(project_ids)
+ end
+
+ def nodes_visible_to_user(user, nodes)
+ group_attr = 'data-group'
+ groups = lazy { grouped_objects_for_nodes(nodes, Group, group_attr) }
+ visible = []
+ remaining = []
+
+ nodes.each do |node|
+ if node.has_attribute?(group_attr)
+ node_group = groups[node.attr(group_attr).to_i]
+
+ if node_group &&
+ can?(user, :read_group, node_group)
+ visible << node
+ end
+ # Remaining nodes will be processed by the parent class'
+ # implementation of this method.
+ else
+ remaining << node
+ end
+ end
+
+ visible + super(current_user, remaining)
+ end
+
+ def nodes_user_can_reference(current_user, nodes)
+ project_attr = 'data-project'
+ author_attr = 'data-author'
+
+ projects = lazy { projects_for_nodes(nodes) }
+ users = lazy { grouped_objects_for_nodes(nodes, User, author_attr) }
+
+ nodes.select do |node|
+ project_id = node.attr(project_attr)
+ user_id = node.attr(author_attr)
+
+ if project && project_id && project.id == project_id.to_i
+ true
+ elsif project_id && user_id
+ project = projects[project_id.to_i]
+ user = users[user_id.to_i]
+
+ project && user ? project.team.member?(user) : false
+ else
+ true
+ end
+ end
+ end
+
+ def find_users(ids)
+ return [] if ids.empty?
+
+ User.where(id: ids).to_a
+ end
+
+ def find_users_for_groups(ids)
+ return [] if ids.empty?
+
+ User.joins(:group_members).where(members: { source_id: ids }).to_a
+ end
+
+ def find_users_for_projects(ids)
+ return [] if ids.empty?
+
+ Project.where(id: ids).flat_map { |p| p.team.members.to_a }
+ end
+ end
+ end
+end