diff options
author | Yorick Peterse <yorickpeterse@gmail.com> | 2016-05-26 13:16:43 +0200 |
---|---|---|
committer | Yorick Peterse <yorickpeterse@gmail.com> | 2016-05-26 17:14:00 +0200 |
commit | 86166d28029d5fcc729f7b7f5a41635c2e783a9e (patch) | |
tree | d4e9354a4daafc7b298bc7a73980166e41d55bf7 /lib/banzai/reference_parser | |
parent | 94d5416db6415b06706204fb4a4df0100bcab7be (diff) | |
download | gitlab-ce-86166d28029d5fcc729f7b7f5a41635c2e783a9e.tar.gz |
Split Markdown rendering & reference gathering
This splits the Markdown rendering and reference extraction phases into
two distinct code bases. The reference extraction phase no longer relies
on the html-pipeline Gem (and any related code) and allows for
extracting of references from multiple HTML nodes in a single pass. This
means that if you want to extract user references from 200 comments you
no longer need to run 200 times N number of queries, instead only a
handful of queries may be needed.
Diffstat (limited to 'lib/banzai/reference_parser')
-rw-r--r-- | lib/banzai/reference_parser/base_parser.rb | 204 | ||||
-rw-r--r-- | lib/banzai/reference_parser/commit_parser.rb | 34 | ||||
-rw-r--r-- | lib/banzai/reference_parser/commit_range_parser.rb | 38 | ||||
-rw-r--r-- | lib/banzai/reference_parser/external_issue_parser.rb | 25 | ||||
-rw-r--r-- | lib/banzai/reference_parser/issue_parser.rb | 40 | ||||
-rw-r--r-- | lib/banzai/reference_parser/label_parser.rb | 11 | ||||
-rw-r--r-- | lib/banzai/reference_parser/merge_request_parser.rb | 11 | ||||
-rw-r--r-- | lib/banzai/reference_parser/milestone_parser.rb | 11 | ||||
-rw-r--r-- | lib/banzai/reference_parser/snippet_parser.rb | 11 | ||||
-rw-r--r-- | lib/banzai/reference_parser/user_parser.rb | 92 |
10 files changed, 477 insertions, 0 deletions
diff --git a/lib/banzai/reference_parser/base_parser.rb b/lib/banzai/reference_parser/base_parser.rb new file mode 100644 index 00000000000..3d7b9c4a024 --- /dev/null +++ b/lib/banzai/reference_parser/base_parser.rb @@ -0,0 +1,204 @@ +module Banzai + module ReferenceParser + # Base class for reference parsing classes. + # + # Each parser should also specify its reference type by calling + # `self.reference_type = ...` in the body of the class. The value of this + # method should be a symbol such as `:issue` or `:merge_request`. For + # example: + # + # class IssueParser < BaseParser + # self.reference_type = :issue + # end + # + # The reference type is used to determine what nodes to pass to the + # `referenced_by` method. + # + # Parser classes should either implement the instance method + # `references_relation` or overwrite `referenced_by`. The + # `references_relation` method is supposed to return an + # ActiveRecord::Relation used as a base relation for retrieving the objects + # referenced in a set of HTML nodes. + # + # Each class can implement two additional methods: + # + # * `nodes_user_can_reference`: returns an Array of nodes the given user can + # refer to. + # * `nodes_visible_to_user`: returns an Array of nodes that are visible to + # the given user. + # + # You only need to overwrite these methods if you want to tweak who can see + # which references. For example, the IssueParser class defines its own + # `nodes_visible_to_user` method so it can ensure users can only see issues + # they have access to. + class BaseParser + class << self + attr_accessor :reference_type + end + + # Returns the attribute name containing the value for every object to be + # parsed by the current parser. + # + # For example, for a parser class that returns "Animal" objects this + # attribute would be "data-animal". + def self.data_attribute + @data_attribute ||= "data-#{reference_type.to_s.dasherize}" + end + + def initialize(project = nil, current_user = nil) + @project = project + @current_user = current_user + end + + # Returns all the nodes containing references that the user can refer to. + def nodes_user_can_reference(user, nodes) + nodes + end + + # Returns all the nodes that are visible to the given user. + def nodes_visible_to_user(user, nodes) + projects = lazy { projects_for_nodes(nodes) } + project_attr = 'data-project' + + nodes.select do |node| + if node.has_attribute?(project_attr) + node_id = node.attr(project_attr).to_i + + if project && project.id == node_id + true + else + can?(user, :read_project, projects[node_id]) + end + else + true + end + end + end + + # Returns an Array of objects referenced by any of the given HTML nodes. + def referenced_by(nodes) + ids = unique_attribute_values(nodes, self.class.data_attribute) + + references_relation.where(id: ids) + end + + # Returns the ActiveRecord::Relation to use for querying references in the + # DB. + def references_relation + raise NotImplementedError, + "#{self.class} does not implement #{__method__}" + end + + # Returns a Hash containing attribute values per project ID. + # + # The returned Hash uses the following format: + # + # { project id => [value1, value2, ...] } + # + # nodes - An Array of HTML nodes to process. + # attribute - The name of the attribute (as a String) for which to gather + # values. + # + # Returns a Hash. + def gather_attributes_per_project(nodes, attribute) + per_project = Hash.new { |hash, key| hash[key] = Set.new } + + nodes.each do |node| + project_id = node.attr('data-project').to_i + id = node.attr(attribute) + + per_project[project_id] << id if id + end + + per_project + end + + # Returns a Hash containing objects for an attribute grouped per their + # IDs. + # + # The returned Hash uses the following format: + # + # { id value => row } + # + # nodes - An Array of HTML nodes to process. + # + # collection - The model or ActiveRecord relation to use for retrieving + # rows from the database. + # + # attribute - The name of the attribute containing the primary key values + # for every row. + # + # Returns a Hash. + def grouped_objects_for_nodes(nodes, collection, attribute) + return {} if nodes.empty? + + ids = unique_attribute_values(nodes, attribute) + + collection.where(id: ids).each_with_object({}) do |row, hash| + hash[row.id] = row + end + end + + # Returns an Array containing all unique values of an attribute of the + # given nodes. + def unique_attribute_values(nodes, attribute) + values = Set.new + + nodes.each do |node| + if node.has_attribute?(attribute) + values << node.attr(attribute) + end + end + + values.to_a + end + + # Processes the list of HTML documents and returns an Array containing all + # the references. + def process(documents) + type = self.class.reference_type + + nodes = documents.flat_map do |document| + Querying.css(document, "a[data-reference-type='#{type}'].gfm").to_a + end + + gather_references(nodes) + end + + # Gathers the references for the given HTML nodes. + def gather_references(nodes) + nodes = nodes_user_can_reference(current_user, nodes) + nodes = nodes_visible_to_user(current_user, nodes) + + referenced_by(nodes) + end + + # Returns a Hash containing the projects for a given list of HTML nodes. + # + # The returned Hash uses the following format: + # + # { project ID => project } + # + def projects_for_nodes(nodes) + @projects_for_nodes ||= + grouped_objects_for_nodes(nodes, Project, 'data-project') + end + + def can?(user, permission, subject) + Ability.abilities.allowed?(user, permission, subject) + end + + def find_projects_for_hash_keys(hash) + Project.where(id: hash.keys) + end + + private + + attr_reader :current_user, :project + + def lazy(&block) + Gitlab::Lazy.new(&block) + end + end + end +end diff --git a/lib/banzai/reference_parser/commit_parser.rb b/lib/banzai/reference_parser/commit_parser.rb new file mode 100644 index 00000000000..0fee9d267de --- /dev/null +++ b/lib/banzai/reference_parser/commit_parser.rb @@ -0,0 +1,34 @@ +module Banzai + module ReferenceParser + class CommitParser < BaseParser + self.reference_type = :commit + + def referenced_by(nodes) + commit_ids = commit_ids_per_project(nodes) + projects = find_projects_for_hash_keys(commit_ids) + + projects.flat_map do |project| + find_commits(project, commit_ids[project.id]) + end + end + + def commit_ids_per_project(nodes) + gather_attributes_per_project(nodes, self.class.data_attribute) + end + + def find_commits(project, ids) + commits = [] + + return commits unless project.valid_repo? + + ids.each do |id| + commit = project.commit(id) + + commits << commit if commit + end + + commits + end + end + end +end diff --git a/lib/banzai/reference_parser/commit_range_parser.rb b/lib/banzai/reference_parser/commit_range_parser.rb new file mode 100644 index 00000000000..69d01f8db15 --- /dev/null +++ b/lib/banzai/reference_parser/commit_range_parser.rb @@ -0,0 +1,38 @@ +module Banzai + module ReferenceParser + class CommitRangeParser < BaseParser + self.reference_type = :commit_range + + def referenced_by(nodes) + range_ids = commit_range_ids_per_project(nodes) + projects = find_projects_for_hash_keys(range_ids) + + projects.flat_map do |project| + find_ranges(project, range_ids[project.id]) + end + end + + def commit_range_ids_per_project(nodes) + gather_attributes_per_project(nodes, self.class.data_attribute) + end + + def find_ranges(project, range_ids) + ranges = [] + + range_ids.each do |id| + range = find_object(project, id) + + ranges << range if range + end + + ranges + end + + def find_object(project, id) + range = CommitRange.new(id, project) + + range.valid_commits? ? range : nil + end + end + end +end diff --git a/lib/banzai/reference_parser/external_issue_parser.rb b/lib/banzai/reference_parser/external_issue_parser.rb new file mode 100644 index 00000000000..a1264db2111 --- /dev/null +++ b/lib/banzai/reference_parser/external_issue_parser.rb @@ -0,0 +1,25 @@ +module Banzai + module ReferenceParser + class ExternalIssueParser < BaseParser + self.reference_type = :external_issue + + def referenced_by(nodes) + issue_ids = issue_ids_per_project(nodes) + projects = find_projects_for_hash_keys(issue_ids) + issues = [] + + projects.each do |project| + issue_ids[project.id].each do |id| + issues << ExternalIssue.new(id, project) + end + end + + issues + end + + def issue_ids_per_project(nodes) + gather_attributes_per_project(nodes, self.class.data_attribute) + end + end + end +end diff --git a/lib/banzai/reference_parser/issue_parser.rb b/lib/banzai/reference_parser/issue_parser.rb new file mode 100644 index 00000000000..24076e3d9ec --- /dev/null +++ b/lib/banzai/reference_parser/issue_parser.rb @@ -0,0 +1,40 @@ +module Banzai + module ReferenceParser + class IssueParser < BaseParser + self.reference_type = :issue + + def nodes_visible_to_user(user, nodes) + # It is not possible to check access rights for external issue trackers + return nodes if project && project.external_issue_tracker + + issues = issues_for_nodes(nodes) + + nodes.select do |node| + issue = issue_for_node(issues, node) + + issue ? can?(user, :read_issue, issue) : false + end + end + + def referenced_by(nodes) + issues = issues_for_nodes(nodes) + + nodes.map { |node| issue_for_node(issues, node) }.uniq + end + + def issues_for_nodes(nodes) + @issues_for_nodes ||= grouped_objects_for_nodes( + nodes, + Issue.all.includes(:author, :assignee, :project), + self.class.data_attribute + ) + end + + private + + def issue_for_node(issues, node) + issues[node.attr(self.class.data_attribute).to_i] + end + end + end +end diff --git a/lib/banzai/reference_parser/label_parser.rb b/lib/banzai/reference_parser/label_parser.rb new file mode 100644 index 00000000000..e5d1eb11d7f --- /dev/null +++ b/lib/banzai/reference_parser/label_parser.rb @@ -0,0 +1,11 @@ +module Banzai + module ReferenceParser + class LabelParser < BaseParser + self.reference_type = :label + + def references_relation + Label + end + end + end +end diff --git a/lib/banzai/reference_parser/merge_request_parser.rb b/lib/banzai/reference_parser/merge_request_parser.rb new file mode 100644 index 00000000000..c9a9ca79c09 --- /dev/null +++ b/lib/banzai/reference_parser/merge_request_parser.rb @@ -0,0 +1,11 @@ +module Banzai + module ReferenceParser + class MergeRequestParser < BaseParser + self.reference_type = :merge_request + + def references_relation + MergeRequest.includes(:author, :assignee, :target_project) + end + end + end +end diff --git a/lib/banzai/reference_parser/milestone_parser.rb b/lib/banzai/reference_parser/milestone_parser.rb new file mode 100644 index 00000000000..a000ac61e5c --- /dev/null +++ b/lib/banzai/reference_parser/milestone_parser.rb @@ -0,0 +1,11 @@ +module Banzai + module ReferenceParser + class MilestoneParser < BaseParser + self.reference_type = :milestone + + def references_relation + Milestone + end + end + end +end diff --git a/lib/banzai/reference_parser/snippet_parser.rb b/lib/banzai/reference_parser/snippet_parser.rb new file mode 100644 index 00000000000..fa71b3c952a --- /dev/null +++ b/lib/banzai/reference_parser/snippet_parser.rb @@ -0,0 +1,11 @@ +module Banzai + module ReferenceParser + class SnippetParser < BaseParser + self.reference_type = :snippet + + def references_relation + Snippet + end + end + end +end diff --git a/lib/banzai/reference_parser/user_parser.rb b/lib/banzai/reference_parser/user_parser.rb new file mode 100644 index 00000000000..a12b0d19560 --- /dev/null +++ b/lib/banzai/reference_parser/user_parser.rb @@ -0,0 +1,92 @@ +module Banzai + module ReferenceParser + class UserParser < BaseParser + self.reference_type = :user + + def referenced_by(nodes) + group_ids = [] + user_ids = [] + project_ids = [] + + nodes.each do |node| + if node.has_attribute?('data-group') + group_ids << node.attr('data-group').to_i + elsif node.has_attribute?(self.class.data_attribute) + user_ids << node.attr(self.class.data_attribute).to_i + elsif node.has_attribute?('data-project') + project_ids << node.attr('data-project').to_i + end + end + + find_users_for_groups(group_ids) | find_users(user_ids) | + find_users_for_projects(project_ids) + end + + def nodes_visible_to_user(user, nodes) + group_attr = 'data-group' + groups = lazy { grouped_objects_for_nodes(nodes, Group, group_attr) } + visible = [] + remaining = [] + + nodes.each do |node| + if node.has_attribute?(group_attr) + node_group = groups[node.attr(group_attr).to_i] + + if node_group && + can?(user, :read_group, node_group) + visible << node + end + # Remaining nodes will be processed by the parent class' + # implementation of this method. + else + remaining << node + end + end + + visible + super(current_user, remaining) + end + + def nodes_user_can_reference(current_user, nodes) + project_attr = 'data-project' + author_attr = 'data-author' + + projects = lazy { projects_for_nodes(nodes) } + users = lazy { grouped_objects_for_nodes(nodes, User, author_attr) } + + nodes.select do |node| + project_id = node.attr(project_attr) + user_id = node.attr(author_attr) + + if project && project_id && project.id == project_id.to_i + true + elsif project_id && user_id + project = projects[project_id.to_i] + user = users[user_id.to_i] + + project && user ? project.team.member?(user) : false + else + true + end + end + end + + def find_users(ids) + return [] if ids.empty? + + User.where(id: ids).to_a + end + + def find_users_for_groups(ids) + return [] if ids.empty? + + User.joins(:group_members).where(members: { source_id: ids }).to_a + end + + def find_users_for_projects(ids) + return [] if ids.empty? + + Project.where(id: ids).flat_map { |p| p.team.members.to_a } + end + end + end +end |