diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/email/html_parser.rb | 31 | ||||
-rw-r--r-- | lib/gitlab/email/reply_parser.rb | 19 |
2 files changed, 45 insertions, 5 deletions
diff --git a/lib/gitlab/email/html_parser.rb b/lib/gitlab/email/html_parser.rb new file mode 100644 index 00000000000..51d8edb1b13 --- /dev/null +++ b/lib/gitlab/email/html_parser.rb @@ -0,0 +1,31 @@ +module Gitlab + module Email + class HTMLParser + def self.parse_reply(raw_body) + new(raw_body).filtered_text + end + + attr_reader :raw_body + def initialize(raw_body) + @raw_body = raw_body + end + + def document + @document ||= Nokogiri::HTML(raw_body) + end + + def filter_replies! + document.xpath('//blockquote').each { |n| n.replace('> ') } + document.xpath('//table').each { |n| n.remove } + end + + def filtered_html + @filtered_html ||= (filter_replies!; document.inner_html) + end + + def filtered_text + @filtered_text ||= Html2Text.convert(filtered_html) + end + end + end +end diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb index 3411eb1d9ce..1ad44425c93 100644 --- a/lib/gitlab/email/reply_parser.rb +++ b/lib/gitlab/email/reply_parser.rb @@ -23,19 +23,28 @@ module Gitlab private def select_body(message) - text = message.text_part if message.multipart? - text ||= message if message.content_type !~ /text\/html/ + if message.multipart? + text = message.text_part || message.html_part || message + else + text = message + end return "" unless text - text = fix_charset(text) + decoded = fix_charset(text) + + return "" unless decoded # Certain trigger phrases that means we didn't parse correctly - if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ + if decoded =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ return "" end - text + if text.content_type =~ %r(text/html) + HTMLParser.parse_reply(decoded) + else + decoded + end end # Force encoding to UTF-8 on a Mail::Message or Mail::Part |