diff options
Diffstat (limited to 'lib/gitlab/email/html_to_markdown_parser.rb')
-rw-r--r-- | lib/gitlab/email/html_to_markdown_parser.rb | 35 |
1 files changed, 28 insertions, 7 deletions
diff --git a/lib/gitlab/email/html_to_markdown_parser.rb b/lib/gitlab/email/html_to_markdown_parser.rb index 42dd012308b..5dd3725cc3e 100644 --- a/lib/gitlab/email/html_to_markdown_parser.rb +++ b/lib/gitlab/email/html_to_markdown_parser.rb @@ -5,25 +5,46 @@ require 'nokogiri' module Gitlab module Email class HtmlToMarkdownParser < Html2Text - ADDITIONAL_TAGS = %w[em strong img details].freeze - IMG_ATTRS = %w[alt src].freeze + extend Gitlab::Utils::Override + # List of tags to be converted by Markdown. + # + # All attributes are removed except for the defined ones. + # + # <tag> => [<attribute to keep>, ...] + ALLOWED_TAG_ATTRIBUTES = { + 'em' => [], + 'strong' => [], + 'details' => [], + 'img' => %w[alt src] + }.freeze + private_constant :ALLOWED_TAG_ATTRIBUTES + + # This redefinition can be removed once https://github.com/soundasleep/html2text_ruby/pull/30 + # is merged and released. def self.convert(html) html = fix_newlines(replace_entities(html)) doc = Nokogiri::HTML(html) - HtmlToMarkdownParser.new(doc).convert + new(doc).convert end + private + + override :iterate_over def iterate_over(node) - return super unless ADDITIONAL_TAGS.include?(node.name) + allowed_attributes = ALLOWED_TAG_ATTRIBUTES[node.name] + return super unless allowed_attributes - if node.name == 'img' - node.keys.each { |key| node.remove_attribute(key) unless IMG_ATTRS.include?(key) } # rubocop:disable Style/HashEachMethods - end + remove_attributes(node, allowed_attributes) Kramdown::Document.new(node.to_html, input: 'html').to_commonmark end + + def remove_attributes(node, allowed_attributes) + to_remove = (node.keys - allowed_attributes) + to_remove.each { |key| node.remove_attribute(key) } + end end end end |