summaryrefslogtreecommitdiff
path: root/lib/gitlab/email/html_to_markdown_parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/email/html_to_markdown_parser.rb')
-rw-r--r--lib/gitlab/email/html_to_markdown_parser.rb35
1 files changed, 28 insertions, 7 deletions
diff --git a/lib/gitlab/email/html_to_markdown_parser.rb b/lib/gitlab/email/html_to_markdown_parser.rb
index 42dd012308b..5dd3725cc3e 100644
--- a/lib/gitlab/email/html_to_markdown_parser.rb
+++ b/lib/gitlab/email/html_to_markdown_parser.rb
@@ -5,25 +5,46 @@ require 'nokogiri'
module Gitlab
module Email
class HtmlToMarkdownParser < Html2Text
- ADDITIONAL_TAGS = %w[em strong img details].freeze
- IMG_ATTRS = %w[alt src].freeze
+ extend Gitlab::Utils::Override
+ # List of tags to be converted by Markdown.
+ #
+ # All attributes are removed except for the defined ones.
+ #
+ # <tag> => [<attribute to keep>, ...]
+ ALLOWED_TAG_ATTRIBUTES = {
+ 'em' => [],
+ 'strong' => [],
+ 'details' => [],
+ 'img' => %w[alt src]
+ }.freeze
+ private_constant :ALLOWED_TAG_ATTRIBUTES
+
+ # This redefinition can be removed once https://github.com/soundasleep/html2text_ruby/pull/30
+ # is merged and released.
def self.convert(html)
html = fix_newlines(replace_entities(html))
doc = Nokogiri::HTML(html)
- HtmlToMarkdownParser.new(doc).convert
+ new(doc).convert
end
+ private
+
+ override :iterate_over
def iterate_over(node)
- return super unless ADDITIONAL_TAGS.include?(node.name)
+ allowed_attributes = ALLOWED_TAG_ATTRIBUTES[node.name]
+ return super unless allowed_attributes
- if node.name == 'img'
- node.keys.each { |key| node.remove_attribute(key) unless IMG_ATTRS.include?(key) } # rubocop:disable Style/HashEachMethods
- end
+ remove_attributes(node, allowed_attributes)
Kramdown::Document.new(node.to_html, input: 'html').to_commonmark
end
+
+ def remove_attributes(node, allowed_attributes)
+ to_remove = (node.keys - allowed_attributes)
+ to_remove.each { |key| node.remove_attribute(key) }
+ end
end
end
end