summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/email/html_parser.rb31
-rw-r--r--lib/gitlab/email/reply_parser.rb19
2 files changed, 45 insertions, 5 deletions
diff --git a/lib/gitlab/email/html_parser.rb b/lib/gitlab/email/html_parser.rb
new file mode 100644
index 00000000000..51d8edb1b13
--- /dev/null
+++ b/lib/gitlab/email/html_parser.rb
@@ -0,0 +1,31 @@
+module Gitlab
+ module Email
+ class HTMLParser
+ def self.parse_reply(raw_body)
+ new(raw_body).filtered_text
+ end
+
+ attr_reader :raw_body
+ def initialize(raw_body)
+ @raw_body = raw_body
+ end
+
+ def document
+ @document ||= Nokogiri::HTML(raw_body)
+ end
+
+ def filter_replies!
+ document.xpath('//blockquote').each { |n| n.replace('> ') }
+ document.xpath('//table').each { |n| n.remove }
+ end
+
+ def filtered_html
+ @filtered_html ||= (filter_replies!; document.inner_html)
+ end
+
+ def filtered_text
+ @filtered_text ||= Html2Text.convert(filtered_html)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
index 3411eb1d9ce..1ad44425c93 100644
--- a/lib/gitlab/email/reply_parser.rb
+++ b/lib/gitlab/email/reply_parser.rb
@@ -23,19 +23,28 @@ module Gitlab
private
def select_body(message)
- text = message.text_part if message.multipart?
- text ||= message if message.content_type !~ /text\/html/
+ if message.multipart?
+ text = message.text_part || message.html_part || message
+ else
+ text = message
+ end
return "" unless text
- text = fix_charset(text)
+ decoded = fix_charset(text)
+
+ return "" unless decoded
# Certain trigger phrases that means we didn't parse correctly
- if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ if decoded =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
return ""
end
- text
+ if text.content_type =~ %r(text/html)
+ HTMLParser.parse_reply(decoded)
+ else
+ decoded
+ end
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part