summaryrefslogtreecommitdiff
path: root/lib/gitlab/email
diff options
context:
space:
mode:
authorhttp://jneen.net/ <jneen@jneen.net>2016-11-10 15:24:02 +0900
committerhttp://jneen.net/ <jneen@jneen.net>2016-11-17 11:59:44 +0900
commitf7b0692912e0679a3e2e77b2d1bfaf305fba473a (patch)
tree99592eef10b6d4179d4ebcfb69ff4b0f99a6db34 /lib/gitlab/email
parent60306053a2a14ff881bb56eadd4968bc4d4f48dc (diff)
downloadgitlab-ce-f7b0692912e0679a3e2e77b2d1bfaf305fba473a.tar.gz
add parsing support for incoming html email
Diffstat (limited to 'lib/gitlab/email')
-rw-r--r--lib/gitlab/email/html_parser.rb31
-rw-r--r--lib/gitlab/email/reply_parser.rb19
2 files changed, 45 insertions, 5 deletions
diff --git a/lib/gitlab/email/html_parser.rb b/lib/gitlab/email/html_parser.rb
new file mode 100644
index 00000000000..51d8edb1b13
--- /dev/null
+++ b/lib/gitlab/email/html_parser.rb
@@ -0,0 +1,31 @@
+module Gitlab
+ module Email
+ class HTMLParser
+ def self.parse_reply(raw_body)
+ new(raw_body).filtered_text
+ end
+
+ attr_reader :raw_body
+ def initialize(raw_body)
+ @raw_body = raw_body
+ end
+
+ def document
+ @document ||= Nokogiri::HTML(raw_body)
+ end
+
+ def filter_replies!
+ document.xpath('//blockquote').each { |n| n.replace('&gt; ') }
+ document.xpath('//table').each { |n| n.remove }
+ end
+
+ def filtered_html
+ @filtered_html ||= (filter_replies!; document.inner_html)
+ end
+
+ def filtered_text
+ @filtered_text ||= Html2Text.convert(filtered_html)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
index 3411eb1d9ce..1ad44425c93 100644
--- a/lib/gitlab/email/reply_parser.rb
+++ b/lib/gitlab/email/reply_parser.rb
@@ -23,19 +23,28 @@ module Gitlab
private
def select_body(message)
- text = message.text_part if message.multipart?
- text ||= message if message.content_type !~ /text\/html/
+ if message.multipart?
+ text = message.text_part || message.html_part || message
+ else
+ text = message
+ end
return "" unless text
- text = fix_charset(text)
+ decoded = fix_charset(text)
+
+ return "" unless decoded
# Certain trigger phrases that means we didn't parse correctly
- if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ if decoded =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
return ""
end
- text
+ if text.content_type =~ %r(text/html)
+ HTMLParser.parse_reply(decoded)
+ else
+ decoded
+ end
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part