diff options
Diffstat (limited to 'lib/banzai')
-rw-r--r-- | lib/banzai/filter/emoji_filter.rb | 5 | ||||
-rw-r--r-- | lib/banzai/filter/external_link_filter.rb | 36 | ||||
-rw-r--r-- | lib/banzai/filter/issuable_state_filter.rb | 37 | ||||
-rw-r--r-- | lib/banzai/filter/markdown_filter.rb | 2 | ||||
-rw-r--r-- | lib/banzai/filter/plantuml_filter.rb | 8 | ||||
-rw-r--r-- | lib/banzai/filter/redactor_filter.rb | 2 | ||||
-rw-r--r-- | lib/banzai/filter/sanitization_filter.rb | 22 | ||||
-rw-r--r-- | lib/banzai/filter/syntax_highlight_filter.rb | 2 | ||||
-rw-r--r-- | lib/banzai/issuable_extractor.rb | 40 | ||||
-rw-r--r-- | lib/banzai/object_renderer.rb | 46 | ||||
-rw-r--r-- | lib/banzai/pipeline/gfm_pipeline.rb | 2 | ||||
-rw-r--r-- | lib/banzai/pipeline/markup_pipeline.rb | 13 | ||||
-rw-r--r-- | lib/banzai/pipeline/post_process_pipeline.rb | 1 | ||||
-rw-r--r-- | lib/banzai/reference_parser/base_parser.rb | 21 | ||||
-rw-r--r-- | lib/banzai/reference_parser/issue_parser.rb | 12 | ||||
-rw-r--r-- | lib/banzai/reference_parser/merge_request_parser.rb | 38 | ||||
-rw-r--r-- | lib/banzai/reference_parser/user_parser.rb | 6 | ||||
-rw-r--r-- | lib/banzai/renderer.rb | 41 | ||||
-rw-r--r-- | lib/banzai/renderer/html.rb | 13 |
19 files changed, 228 insertions, 119 deletions
diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb index d6138816e70..6255a611dbe 100644 --- a/lib/banzai/filter/emoji_filter.rb +++ b/lib/banzai/filter/emoji_filter.rb @@ -53,7 +53,10 @@ module Banzai # Build a regexp that matches all valid :emoji: names. def self.emoji_pattern - @emoji_pattern ||= /:(#{Gitlab::Emoji.emojis_names.map { |name| Regexp.escape(name) }.join('|')}):/ + @emoji_pattern ||= + /(?<=[^[:alnum:]:]|\n|^) + :(#{Gitlab::Emoji.emojis_names.map { |name| Regexp.escape(name) }.join('|')}): + (?=[^[:alnum:]:]|$)/x end # Build a regexp that matches all valid unicode emojis names. diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index d67d466bce8..d6327ef31cb 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -2,16 +2,17 @@ module Banzai module Filter # HTML Filter to modify the attributes of external links class ExternalLinkFilter < HTML::Pipeline::Filter + SCHEMES = ['http', 'https', nil].freeze + def call links.each do |node| - href = href_to_lowercase_scheme(node["href"].to_s) + uri = uri(node['href'].to_s) + next unless uri - unless node["href"].to_s == href - node.set_attribute('href', href) - end + node.set_attribute('href', uri.to_s) - if href =~ %r{\A(https?:)?//[^/]} && external_url?(href) - node.set_attribute('rel', 'nofollow noreferrer') + if SCHEMES.include?(uri.scheme) && external_url?(uri) + node.set_attribute('rel', 'nofollow noreferrer noopener') node.set_attribute('target', '_blank') end end @@ -21,27 +22,26 @@ module Banzai private + def uri(href) + URI.parse(href) + rescue URI::Error + nil + end + def links query = 'descendant-or-self::a[@href and not(@href = "")]' doc.xpath(query) end - def href_to_lowercase_scheme(href) - scheme_match = href.match(/\A(\w+):\/\//) - - if scheme_match - scheme_match.to_s.downcase + scheme_match.post_match - else - href - end - end + def external_url?(uri) + # Relative URLs miss a hostname + return false unless uri.hostname - def external_url?(url) - !url.start_with?(internal_url) + uri.hostname != internal_url.hostname end def internal_url - @internal_url ||= Gitlab.config.gitlab.url + @internal_url ||= URI.parse(Gitlab.config.gitlab.url) end end end diff --git a/lib/banzai/filter/issuable_state_filter.rb b/lib/banzai/filter/issuable_state_filter.rb new file mode 100644 index 00000000000..327ea9449a1 --- /dev/null +++ b/lib/banzai/filter/issuable_state_filter.rb @@ -0,0 +1,37 @@ +module Banzai + module Filter + # HTML filter that appends state information to issuable links. + # Runs as a post-process filter as issuable state might change whilst + # Markdown is in the cache. + # + # This filter supports cross-project references. + class IssuableStateFilter < HTML::Pipeline::Filter + VISIBLE_STATES = %w(closed merged).freeze + + def call + return doc unless context[:issuable_state_filter_enabled] + + extractor = Banzai::IssuableExtractor.new(project, current_user) + issuables = extractor.extract([doc]) + + issuables.each do |node, issuable| + if VISIBLE_STATES.include?(issuable.state) && node.inner_html == issuable.reference_link_text(project) + node.content += " (#{issuable.state})" + end + end + + doc + end + + private + + def current_user + context[:current_user] + end + + def project + context[:project] + end + end + end +end diff --git a/lib/banzai/filter/markdown_filter.rb b/lib/banzai/filter/markdown_filter.rb index ff580ec68f8..ee73fa91589 100644 --- a/lib/banzai/filter/markdown_filter.rb +++ b/lib/banzai/filter/markdown_filter.rb @@ -14,7 +14,7 @@ module Banzai def self.renderer @renderer ||= begin - renderer = Redcarpet::Render::HTML.new + renderer = Banzai::Renderer::HTML.new Redcarpet::Markdown.new(renderer, redcarpet_options) end end diff --git a/lib/banzai/filter/plantuml_filter.rb b/lib/banzai/filter/plantuml_filter.rb index b2537117558..5325819d828 100644 --- a/lib/banzai/filter/plantuml_filter.rb +++ b/lib/banzai/filter/plantuml_filter.rb @@ -7,14 +7,14 @@ module Banzai # class PlantumlFilter < HTML::Pipeline::Filter def call - return doc unless doc.at('pre.plantuml') && settings.plantuml_enabled + return doc unless doc.at('pre > code[lang="plantuml"]') && settings.plantuml_enabled plantuml_setup - doc.css('pre.plantuml').each do |el| + doc.css('pre > code[lang="plantuml"]').each do |node| img_tag = Nokogiri::HTML::DocumentFragment.parse( - Asciidoctor::PlantUml::Processor.plantuml_content(el.content, {})) - el.replace img_tag + Asciidoctor::PlantUml::Processor.plantuml_content(node.content, {})) + node.parent.replace(img_tag) end doc diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/redactor_filter.rb index c59a80dd1c7..9f9882b3b40 100644 --- a/lib/banzai/filter/redactor_filter.rb +++ b/lib/banzai/filter/redactor_filter.rb @@ -7,7 +7,7 @@ module Banzai # class RedactorFilter < HTML::Pipeline::Filter def call - Redactor.new(project, current_user).redact([doc]) + Redactor.new(project, current_user).redact([doc]) unless context[:skip_redaction] doc end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index d5f9e252f62..522217deae4 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -24,10 +24,6 @@ module Banzai # Only push these customizations once return if customized?(whitelist[:transformers]) - # Allow code highlighting - whitelist[:attributes]['pre'] = %w(class v-pre) - whitelist[:attributes]['span'] = %w(class) - # Allow table alignment whitelist[:attributes]['th'] = %w(style) whitelist[:attributes]['td'] = %w(style) @@ -52,9 +48,6 @@ module Banzai # Remove `rel` attribute from `a` elements whitelist[:transformers].push(self.class.remove_rel) - # Remove `class` attribute from non-highlight spans - whitelist[:transformers].push(self.class.clean_spans) - whitelist end @@ -84,21 +77,6 @@ module Banzai end end end - - def clean_spans - lambda do |env| - node = env[:node] - - return unless node.name == 'span' - return unless node.has_attribute?('class') - - unless node.ancestors.any? { |n| n.name.casecmp('pre').zero? } - node.remove_attribute('class') - end - - { node_whitelist: [node] } - end - end end end end diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb index 9f09ca90697..7da565043d1 100644 --- a/lib/banzai/filter/syntax_highlight_filter.rb +++ b/lib/banzai/filter/syntax_highlight_filter.rb @@ -14,7 +14,7 @@ module Banzai end def highlight_node(node) - language = node.attr('class') + language = node.attr('lang') code = node.text css_classes = "code highlight" lexer = lexer_for(language) diff --git a/lib/banzai/issuable_extractor.rb b/lib/banzai/issuable_extractor.rb new file mode 100644 index 00000000000..cbabf9156de --- /dev/null +++ b/lib/banzai/issuable_extractor.rb @@ -0,0 +1,40 @@ +module Banzai + # Extract references to issuables from multiple documents + + # This populates RequestStore cache used in Banzai::ReferenceParser::IssueParser + # and Banzai::ReferenceParser::MergeRequestParser + # Populating the cache should happen before processing documents one-by-one + # so we can avoid N+1 queries problem + + class IssuableExtractor + QUERY = %q( + descendant-or-self::a[contains(concat(" ", @class, " "), " gfm ")] + [@data-reference-type="issue" or @data-reference-type="merge_request"] + ).freeze + + attr_reader :project, :user + + def initialize(project, user) + @project = project + @user = user + end + + # Returns Hash in the form { node => issuable_instance } + def extract(documents) + nodes = documents.flat_map do |document| + document.xpath(QUERY) + end + + issue_parser = Banzai::ReferenceParser::IssueParser.new(project, user) + merge_request_parser = Banzai::ReferenceParser::MergeRequestParser.new(project, user) + + issuables_for_nodes = issue_parser.issues_for_nodes(nodes).merge( + merge_request_parser.merge_requests_for_nodes(nodes) + ) + + # The project for the issue/MR might be pending for deletion! + # Filter them out because we don't care about them. + issuables_for_nodes.select { |node, issuable| issuable.project } + end + end +end diff --git a/lib/banzai/object_renderer.rb b/lib/banzai/object_renderer.rb index 9f8eb0931b8..002a3341ccd 100644 --- a/lib/banzai/object_renderer.rb +++ b/lib/banzai/object_renderer.rb @@ -31,7 +31,8 @@ module Banzai # # Returns the same input objects. def render(objects, attribute) - documents = render_objects(objects, attribute) + documents = render_documents(objects, attribute) + documents = post_process_documents(documents, objects, attribute) redacted = redact_documents(documents) objects.each_with_index do |object, index| @@ -41,9 +42,24 @@ module Banzai end end - # Renders the attribute of every given object. - def render_objects(objects, attribute) - render_attributes(objects, attribute) + private + + def render_documents(objects, attribute) + pipeline = HTML::Pipeline.new([]) + + objects.map do |object| + pipeline.to_document(Banzai.render_field(object, attribute)) + end + end + + def post_process_documents(documents, objects, attribute) + # Called here to populate cache, refer to IssuableExtractor docs + IssuableExtractor.new(project, user).extract(documents) + + documents.zip(objects).map do |document, object| + context = context_for(object, attribute) + Banzai::Pipeline[:post_process].to_document(document, context) + end end # Redacts the list of documents. @@ -57,25 +73,15 @@ module Banzai # Returns a Banzai context for the given object and attribute. def context_for(object, attribute) - context = base_context.dup - context = context.merge(object.banzai_render_context(attribute)) - context - end - - # Renders the attributes of a set of objects. - # - # Returns an Array of `Nokogiri::HTML::Document`. - def render_attributes(objects, attribute) - objects.map do |object| - string = Banzai.render_field(object, attribute) - context = context_for(object, attribute) - - Banzai::Pipeline[:relative_link].to_document(string, context) - end + base_context.merge(object.banzai_render_context(attribute)) end def base_context - @base_context ||= @redaction_context.merge(current_user: user, project: project) + @base_context ||= @redaction_context.merge( + current_user: user, + project: project, + skip_redaction: true + ) end end end diff --git a/lib/banzai/pipeline/gfm_pipeline.rb b/lib/banzai/pipeline/gfm_pipeline.rb index fd4a6a107c2..bd4d1aa9ff8 100644 --- a/lib/banzai/pipeline/gfm_pipeline.rb +++ b/lib/banzai/pipeline/gfm_pipeline.rb @@ -9,9 +9,9 @@ module Banzai # The GFM-to-HTML-to-GFM cycle is tested in spec/features/copy_as_gfm_spec.rb. def self.filters @filters ||= FilterArray[ - Filter::SyntaxHighlightFilter, Filter::PlantumlFilter, Filter::SanitizationFilter, + Filter::SyntaxHighlightFilter, Filter::MathFilter, Filter::UploadLinkFilter, diff --git a/lib/banzai/pipeline/markup_pipeline.rb b/lib/banzai/pipeline/markup_pipeline.rb new file mode 100644 index 00000000000..c56d908009f --- /dev/null +++ b/lib/banzai/pipeline/markup_pipeline.rb @@ -0,0 +1,13 @@ +module Banzai + module Pipeline + class MarkupPipeline < BasePipeline + def self.filters + @filters ||= FilterArray[ + Filter::SanitizationFilter, + Filter::ExternalLinkFilter, + Filter::PlantumlFilter + ] + end + end + end +end diff --git a/lib/banzai/pipeline/post_process_pipeline.rb b/lib/banzai/pipeline/post_process_pipeline.rb index ecff094b1e5..131ac3b0eec 100644 --- a/lib/banzai/pipeline/post_process_pipeline.rb +++ b/lib/banzai/pipeline/post_process_pipeline.rb @@ -4,6 +4,7 @@ module Banzai def self.filters FilterArray[ Filter::RelativeLinkFilter, + Filter::IssuableStateFilter, Filter::RedactorFilter ] end diff --git a/lib/banzai/reference_parser/base_parser.rb b/lib/banzai/reference_parser/base_parser.rb index 52fdb9a2140..c2503fa2adc 100644 --- a/lib/banzai/reference_parser/base_parser.rb +++ b/lib/banzai/reference_parser/base_parser.rb @@ -62,8 +62,7 @@ module Banzai nodes.select do |node| if node.has_attribute?(project_attr) - node_id = node.attr(project_attr).to_i - can_read_reference?(user, projects[node_id]) + can_read_reference?(user, projects[node]) else true end @@ -112,12 +111,12 @@ module Banzai per_project end - # Returns a Hash containing objects for an attribute grouped per their - # IDs. + # Returns a Hash containing objects for an attribute grouped per the + # nodes that reference them. # # The returned Hash uses the following format: # - # { id value => row } + # { node => row } # # nodes - An Array of HTML nodes to process. # @@ -132,9 +131,15 @@ module Banzai return {} if nodes.empty? ids = unique_attribute_values(nodes, attribute) - rows = collection_objects_for_ids(collection, ids) + collection_objects = collection_objects_for_ids(collection, ids) + objects_by_id = collection_objects.index_by(&:id) - rows.index_by(&:id) + nodes.each_with_object({}) do |node, hash| + if node.has_attribute?(attribute) + obj = objects_by_id[node.attr(attribute).to_i] + hash[node] = obj if obj + end + end end # Returns an Array containing all unique values of an attribute of the @@ -201,7 +206,7 @@ module Banzai # # The returned Hash uses the following format: # - # { project ID => project } + # { node => project } # def projects_for_nodes(nodes) @projects_for_nodes ||= diff --git a/lib/banzai/reference_parser/issue_parser.rb b/lib/banzai/reference_parser/issue_parser.rb index 6c20dec5734..89ec715ddf6 100644 --- a/lib/banzai/reference_parser/issue_parser.rb +++ b/lib/banzai/reference_parser/issue_parser.rb @@ -13,14 +13,14 @@ module Banzai issues_readable_by_user(issues.values, user).to_set nodes.select do |node| - readable_issues.include?(issue_for_node(issues, node)) + readable_issues.include?(issues[node]) end end def referenced_by(nodes) issues = issues_for_nodes(nodes) - nodes.map { |node| issue_for_node(issues, node) }.uniq + nodes.map { |node| issues[node] }.compact.uniq end def issues_for_nodes(nodes) @@ -28,7 +28,7 @@ module Banzai nodes, Issue.all.includes( :author, - :assignee, + :assignees, { # These associations are primarily used for checking permissions. # Eager loading these ensures we don't end up running dozens of @@ -44,12 +44,6 @@ module Banzai self.class.data_attribute ) end - - private - - def issue_for_node(issues, node) - issues[node.attr(self.class.data_attribute).to_i] - end end end end diff --git a/lib/banzai/reference_parser/merge_request_parser.rb b/lib/banzai/reference_parser/merge_request_parser.rb index 40451947e6c..8b0662749fd 100644 --- a/lib/banzai/reference_parser/merge_request_parser.rb +++ b/lib/banzai/reference_parser/merge_request_parser.rb @@ -3,14 +3,42 @@ module Banzai class MergeRequestParser < BaseParser self.reference_type = :merge_request - def references_relation - MergeRequest.includes(:author, :assignee, :target_project) + def nodes_visible_to_user(user, nodes) + merge_requests = merge_requests_for_nodes(nodes) + + nodes.select do |node| + merge_request = merge_requests[node] + + merge_request && can?(user, :read_merge_request, merge_request.project) + end end - private + def referenced_by(nodes) + merge_requests = merge_requests_for_nodes(nodes) + + nodes.map { |node| merge_requests[node] }.compact.uniq + end - def can_read_reference?(user, ref_project) - can?(user, :read_merge_request, ref_project) + def merge_requests_for_nodes(nodes) + @merge_requests_for_nodes ||= grouped_objects_for_nodes( + nodes, + MergeRequest.includes( + :author, + :assignee, + { + # These associations are primarily used for checking permissions. + # Eager loading these ensures we don't end up running dozens of + # queries in this process. + target_project: [ + { namespace: :owner }, + { group: [:owners, :group_members] }, + :invited_groups, + :project_members, + :project_feature + ] + }), + self.class.data_attribute + ) end end end diff --git a/lib/banzai/reference_parser/user_parser.rb b/lib/banzai/reference_parser/user_parser.rb index 7adaffa19c1..09b66cbd8fb 100644 --- a/lib/banzai/reference_parser/user_parser.rb +++ b/lib/banzai/reference_parser/user_parser.rb @@ -49,7 +49,7 @@ module Banzai # Check if project belongs to a group which # user can read. def can_read_group_reference?(node, user, groups) - node_group = groups[node.attr('data-group').to_i] + node_group = groups[node] node_group && can?(user, :read_group, node_group) end @@ -74,8 +74,8 @@ module Banzai if project && project_id && project.id == project_id.to_i true elsif project_id && user_id - project = projects[project_id.to_i] - user = users[user_id.to_i] + project = projects[node] + user = users[node] project && user ? project.team.member?(user) : false else diff --git a/lib/banzai/renderer.rb b/lib/banzai/renderer.rb index 74663556cbb..c7801cb5baf 100644 --- a/lib/banzai/renderer.rb +++ b/lib/banzai/renderer.rb @@ -1,7 +1,5 @@ module Banzai module Renderer - module_function - # Convert a Markdown String into an HTML-safe String of HTML # # Note that while the returned HTML will have been sanitized of dangerous @@ -16,7 +14,7 @@ module Banzai # context - Hash of context options passed to our HTML Pipeline # # Returns an HTML-safe String - def render(text, context = {}) + def self.render(text, context = {}) cache_key = context.delete(:cache_key) cache_key = full_cache_key(cache_key, context[:pipeline]) @@ -35,24 +33,16 @@ module Banzai # of HTML. This method is analogous to calling render(object.field), but it # can cache the rendered HTML in the object, rather than Redis. # - # The context to use is learned from the passed-in object by calling - # #banzai_render_context(field), and cannot be changed. Use #render, passing - # it the field text, if a custom rendering is needed. The generated context - # is returned along with the HTML. - def render_field(object, field) - html_field = object.markdown_cache_field_for(field) - - html = object.__send__(html_field) - return html if html.present? - - html = cacheless_render_field(object, field) - update_object(object, html_field, html) unless object.new_record? || object.destroyed? + # The context to use is managed by the object and cannot be changed. + # Use #render, passing it the field text, if a custom rendering is needed. + def self.render_field(object, field) + object.refresh_markdown_cache!(do_update: update_object?(object)) unless object.cached_html_up_to_date?(field) - html + object.cached_html_for(field) end # Same as +render_field+, but without consulting or updating the cache field - def cacheless_render_field(object, field, options = {}) + def self.cacheless_render_field(object, field, options = {}) text = object.__send__(field) context = object.banzai_render_context(field).merge(options) @@ -82,7 +72,7 @@ module Banzai # texts_and_contexts # => [{ text: '### Hello', # context: { cache_key: [note, :note] } }] - def cache_collection_render(texts_and_contexts) + def self.cache_collection_render(texts_and_contexts) items_collection = texts_and_contexts.each_with_index do |item, index| context = item[:context] cache_key = full_cache_multi_key(context.delete(:cache_key), context[:pipeline]) @@ -111,7 +101,7 @@ module Banzai items_collection.map { |item| item[:rendered] } end - def render_result(text, context = {}) + def self.render_result(text, context = {}) text = Pipeline[:pre_process].to_html(text, context) if text Pipeline[context[:pipeline]].call(text, context) @@ -130,7 +120,7 @@ module Banzai # :user - User object # # Returns an HTML-safe String - def post_process(html, context) + def self.post_process(html, context) context = Pipeline[context[:pipeline]].transform_context(context) pipeline = Pipeline[:post_process] @@ -141,7 +131,7 @@ module Banzai end.html_safe end - def cacheless_render(text, context = {}) + def self.cacheless_render(text, context = {}) Gitlab::Metrics.measure(:banzai_cacheless_render) do result = render_result(text, context) @@ -154,7 +144,7 @@ module Banzai end end - def full_cache_key(cache_key, pipeline_name) + def self.full_cache_key(cache_key, pipeline_name) return unless cache_key ["banzai", *cache_key, pipeline_name || :full] end @@ -162,13 +152,14 @@ module Banzai # To map Rails.cache.read_multi results we need to know the Rails.cache.expanded_key. # Other option will be to generate stringified keys on our side and don't delegate to Rails.cache.expanded_key # method. - def full_cache_multi_key(cache_key, pipeline_name) + def self.full_cache_multi_key(cache_key, pipeline_name) return unless cache_key Rails.cache.send(:expanded_key, full_cache_key(cache_key, pipeline_name)) end - def update_object(object, html_field, html) - object.update_column(html_field, html) + # GitLab EE needs to disable updates on GET requests in Geo + def self.update_object?(object) + true end end end diff --git a/lib/banzai/renderer/html.rb b/lib/banzai/renderer/html.rb new file mode 100644 index 00000000000..252caa35947 --- /dev/null +++ b/lib/banzai/renderer/html.rb @@ -0,0 +1,13 @@ +module Banzai + module Renderer + class HTML < Redcarpet::Render::HTML + def block_code(code, lang) + lang_attr = lang ? %Q{ lang="#{lang}"} : '' + + "\n<pre>" \ + "<code#{lang_attr}>#{html_escape(code)}</code>" \ + "</pre>" + end + end + end +end |