summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2015-08-20 11:05:06 -0700
committerDouwe Maan <douwe@gitlab.com>2015-08-20 11:05:06 -0700
commite9972efc2f3d730e989907585dd1438c517a0bba (patch)
tree7a38f9638cc50813d16d55f9276db98dd7cb041c /lib
parent3ff9d5c64cef8bf8daed5e253e388545987fb945 (diff)
downloadgitlab-ce-e9972efc2f3d730e989907585dd1438c517a0bba.tar.gz
Extract ReplyParser and AttachmentUploader from Receiver.
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/email/attachment_uploader.rb35
-rw-r--r--lib/gitlab/email/html_cleaner.rb135
-rw-r--r--lib/gitlab/email/receiver.rb101
-rw-r--r--lib/gitlab/email/reply_parser.rb91
-rw-r--r--lib/gitlab/email_html_cleaner.rb133
-rw-r--r--lib/gitlab/email_receiver.rb192
6 files changed, 362 insertions, 325 deletions
diff --git a/lib/gitlab/email/attachment_uploader.rb b/lib/gitlab/email/attachment_uploader.rb
new file mode 100644
index 00000000000..0c0f50f2751
--- /dev/null
+++ b/lib/gitlab/email/attachment_uploader.rb
@@ -0,0 +1,35 @@
+module Gitlab
+ module Email
+ module AttachmentUploader
+ attr_accessor :message
+
+ def initialize(message)
+ @message = message
+ end
+
+ def execute(project)
+ attachments = []
+
+ message.attachments.each do |attachment|
+ tmp = Tempfile.new("gitlab-email-attachment")
+ begin
+ File.open(tmp.path, "w+b") { |f| f.write attachment.body.decoded }
+
+ file = {
+ tempfile: tmp,
+ filename: attachment.filename,
+ content_type: attachment.content_type
+ }
+
+ link = ::Projects::UploadService.new(project, file).execute
+ attachments << link if link
+ ensure
+ tmp.close!
+ end
+ end
+
+ attachments
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb
new file mode 100644
index 00000000000..e1ae9eee56c
--- /dev/null
+++ b/lib/gitlab/email/html_cleaner.rb
@@ -0,0 +1,135 @@
+# Taken mostly from Discourse's Email::HtmlCleaner
+module Gitlab
+ module Email
+ # HtmlCleaner cleans up the extremely dirty HTML that many email clients
+ # generate by stripping out any excess divs or spans, removing styling in
+ # the process (which also makes the html more suitable to be parsed as
+ # Markdown).
+ class HtmlCleaner
+ # Elements to hoist all children out of
+ HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
+ # Node types to always delete
+ HTML_DELETE_ELEMENT_TYPES = [
+ Nokogiri::XML::Node::DTD_NODE,
+ Nokogiri::XML::Node::COMMENT_NODE,
+ ]
+
+ # Private variables:
+ # @doc - nokogiri document
+ # @out - same as @doc, but only if trimming has occured
+ def initialize(html)
+ if html.is_a?(String)
+ @doc = Nokogiri::HTML(html)
+ else
+ @doc = html
+ end
+ end
+
+ class << self
+ # HtmlCleaner.trim(inp, opts={})
+ #
+ # Arguments:
+ # inp - Either a HTML string or a Nokogiri document.
+ # Options:
+ # :return => :doc, :string
+ # Specify the desired return type.
+ # Defaults to the type of the input.
+ # A value of :string is equivalent to calling get_document_text()
+ # on the returned document.
+ def trim(inp, opts={})
+ cleaner = HtmlCleaner.new(inp)
+
+ opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
+
+ if opts[:return] == :string
+ cleaner.output_html
+ else
+ cleaner.output_document
+ end
+ end
+
+ # HtmlCleaner.get_document_text(doc)
+ #
+ # Get the body portion of the document, including html, as a string.
+ def get_document_text(doc)
+ body = doc.xpath('//body')
+ if body
+ body.inner_html
+ else
+ doc.inner_html
+ end
+ end
+ end
+
+ def output_document
+ @out ||= begin
+ doc = @doc
+ trim_process_node doc
+ add_newlines doc
+ doc
+ end
+ end
+
+ def output_html
+ HtmlCleaner.get_document_text(output_document)
+ end
+
+ private
+
+ def add_newlines(doc)
+ # Replace <br> tags with a markdown \n
+ doc.xpath('//br').each do |br|
+ br.replace(new_linebreak_node doc, 2)
+ end
+ # Surround <p> tags with newlines, to help with line-wise postprocessing
+ # and ensure markdown paragraphs
+ doc.xpath('//p').each do |p|
+ p.before(new_linebreak_node doc)
+ p.after(new_linebreak_node doc, 2)
+ end
+ end
+
+ def new_linebreak_node(doc, count=1)
+ Nokogiri::XML::Text.new("\n" * count, doc)
+ end
+
+ def trim_process_node(node)
+ if should_hoist?(node)
+ hoisted = trim_hoist_element node
+ hoisted.each { |child| trim_process_node child }
+ elsif should_delete?(node)
+ node.remove
+ else
+ if children = node.children
+ children.each { |child| trim_process_node child }
+ end
+ end
+
+ node
+ end
+
+ def trim_hoist_element(element)
+ hoisted = []
+ element.children.each do |child|
+ element.before(child)
+ hoisted << child
+ end
+ element.remove
+ hoisted
+ end
+
+ def should_hoist?(node)
+ return false unless node.element?
+ HTML_HOIST_ELEMENTS.include? node.name
+ end
+
+ def should_delete?(node)
+ return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
+ return true if node.element? && node.name == 'head'
+ return true if node.text? && node.text.strip.blank?
+
+ false
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/email/receiver.rb b/lib/gitlab/email/receiver.rb
new file mode 100644
index 00000000000..c46fce6afe2
--- /dev/null
+++ b/lib/gitlab/email/receiver.rb
@@ -0,0 +1,101 @@
+# Inspired in great part by Discourse's Email::Receiver
+module Gitlab
+ module Email
+ class Receiver
+ class ProcessingError < StandardError; end
+ class EmailUnparsableError < ProcessingError; end
+ class EmptyEmailError < ProcessingError; end
+ class UserNotFoundError < ProcessingError; end
+ class UserNotAuthorizedError < ProcessingError; end
+ class NoteableNotFoundError < ProcessingError; end
+ class AutoGeneratedEmailError < ProcessingError; end
+ class SentNotificationNotFound < ProcessingError; end
+ class InvalidNote < ProcessingError; end
+
+ def initialize(raw)
+ @raw = raw
+ end
+
+ def message
+ @message ||= Mail::Message.new(@raw)
+ rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
+ raise EmailUnparsableError, e
+ end
+
+ def execute
+ raise SentNotificationNotFound unless sent_notification
+
+ raise EmptyEmailError if @raw.blank?
+
+ raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/
+
+ author = sent_notification.recipient
+
+ raise UserNotFoundError unless author
+
+ project = sent_notification.project
+
+ raise UserNotAuthorizedError unless author.can?(:create_note, project)
+
+ raise NoteableNotFoundError unless sent_notification.noteable
+
+ reply = ReplyParser.new(message).execute.strip
+
+ raise EmptyEmailError if reply.blank?
+
+ reply = add_attachments(reply)
+
+ note = create_note(reply)
+
+ unless note.persisted?
+ message = "The comment could not be created for the following reasons:"
+ note.errors.full_messages.each do |error|
+ message << "\n\n- #{error}"
+ end
+
+ raise InvalidNote, message
+ end
+ end
+
+ private
+
+ def reply_key
+ reply_key = nil
+ message.to.each do |address|
+ reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address)
+ break if reply_key
+ end
+
+ reply_key
+ end
+
+ def sent_notification
+ return nil unless reply_key
+
+ SentNotification.for(reply_key)
+ end
+
+ def add_attachments(reply)
+ attachments = AttachmentUploader.new(message).execute(project)
+
+ attachments.each do |link|
+ text = "[#{link[:alt]}](#{link[:url]})"
+ text.prepend("!") if link[:is_image]
+
+ reply << "\n\n#{text}"
+ end
+ end
+
+ def create_note(reply)
+ Notes::CreateService.new(
+ sent_notification.project,
+ sent_notification.recipient,
+ note: reply,
+ noteable_type: sent_notification.noteable_type,
+ noteable_id: sent_notification.noteable_id,
+ commit_id: sent_notification.commit_id
+ ).execute
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
new file mode 100644
index 00000000000..6ceb755968c
--- /dev/null
+++ b/lib/gitlab/email/reply_parser.rb
@@ -0,0 +1,91 @@
+# Inspired in great part by Discourse's Email::Receiver
+module Gitlab
+ module Email
+ class ReplyParser
+ attr_accessor :message
+
+ def initialize(message)
+ @message = message
+ end
+
+ def execute
+ body = select_body(message)
+
+ encoding = body.encoding
+
+ body = discourse_email_trimmer(body)
+
+ body = EmailReplyParser.parse_reply(body)
+
+ body.force_encoding(encoding).encode("UTF-8")
+ end
+
+ private
+
+ def select_body(message)
+ html = nil
+ text = nil
+
+ if message.multipart?
+ html = fix_charset(message.html_part)
+ text = fix_charset(message.text_part)
+ elsif message.content_type =~ /text\/html/
+ html = fix_charset(message)
+ end
+
+ # prefer plain text
+ return text if text
+
+ if html
+ body = HtmlCleaner.new(html).output_html
+ else
+ body = fix_charset(message)
+ end
+
+ # Certain trigger phrases that means we didn't parse correctly
+ if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ return ""
+ end
+
+ body
+ end
+
+ # Force encoding to UTF-8 on a Mail::Message or Mail::Part
+ def fix_charset(object)
+ return nil if object.nil?
+
+ if object.charset
+ object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
+ else
+ object.body.to_s
+ end
+ rescue
+ nil
+ end
+
+ REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
+ REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
+
+ def discourse_email_trimmer(body)
+ lines = body.scrub.lines.to_a
+ range_end = 0
+
+ lines.each_with_index do |l, idx|
+ # This one might be controversial but so many reply lines have years, times and end with a colon.
+ # Let's try it and see how well it works.
+ break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
+ (l =~ /On \w+ \d+,? \d+,?.*wrote:/)
+
+ # Headers on subsequent lines
+ break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
+ # Headers on the same line
+ break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
+
+ range_end = idx
+ end
+
+ lines[0..range_end].join.strip
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/email_html_cleaner.rb b/lib/gitlab/email_html_cleaner.rb
deleted file mode 100644
index 6d7a17fe87c..00000000000
--- a/lib/gitlab/email_html_cleaner.rb
+++ /dev/null
@@ -1,133 +0,0 @@
-# Taken mostly from Discourse's Email::HtmlCleaner
-module Gitlab
- # HtmlCleaner cleans up the extremely dirty HTML that many email clients
- # generate by stripping out any excess divs or spans, removing styling in
- # the process (which also makes the html more suitable to be parsed as
- # Markdown).
- class EmailHtmlCleaner
- # Elements to hoist all children out of
- HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
- # Node types to always delete
- HTML_DELETE_ELEMENT_TYPES = [
- Nokogiri::XML::Node::DTD_NODE,
- Nokogiri::XML::Node::COMMENT_NODE,
- ]
-
- # Private variables:
- # @doc - nokogiri document
- # @out - same as @doc, but only if trimming has occured
- def initialize(html)
- if html.is_a?(String)
- @doc = Nokogiri::HTML(html)
- else
- @doc = html
- end
- end
-
- class << self
- # EmailHtmlCleaner.trim(inp, opts={})
- #
- # Arguments:
- # inp - Either a HTML string or a Nokogiri document.
- # Options:
- # :return => :doc, :string
- # Specify the desired return type.
- # Defaults to the type of the input.
- # A value of :string is equivalent to calling get_document_text()
- # on the returned document.
- def trim(inp, opts={})
- cleaner = EmailHtmlCleaner.new(inp)
-
- opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
-
- if opts[:return] == :string
- cleaner.output_html
- else
- cleaner.output_document
- end
- end
-
- # EmailHtmlCleaner.get_document_text(doc)
- #
- # Get the body portion of the document, including html, as a string.
- def get_document_text(doc)
- body = doc.xpath('//body')
- if body
- body.inner_html
- else
- doc.inner_html
- end
- end
- end
-
- def output_document
- @out ||= begin
- doc = @doc
- trim_process_node doc
- add_newlines doc
- doc
- end
- end
-
- def output_html
- EmailHtmlCleaner.get_document_text(output_document)
- end
-
- private
-
- def add_newlines(doc)
- # Replace <br> tags with a markdown \n
- doc.xpath('//br').each do |br|
- br.replace(new_linebreak_node doc, 2)
- end
- # Surround <p> tags with newlines, to help with line-wise postprocessing
- # and ensure markdown paragraphs
- doc.xpath('//p').each do |p|
- p.before(new_linebreak_node doc)
- p.after(new_linebreak_node doc, 2)
- end
- end
-
- def new_linebreak_node(doc, count=1)
- Nokogiri::XML::Text.new("\n" * count, doc)
- end
-
- def trim_process_node(node)
- if should_hoist?(node)
- hoisted = trim_hoist_element node
- hoisted.each { |child| trim_process_node child }
- elsif should_delete?(node)
- node.remove
- else
- if children = node.children
- children.each { |child| trim_process_node child }
- end
- end
-
- node
- end
-
- def trim_hoist_element(element)
- hoisted = []
- element.children.each do |child|
- element.before(child)
- hoisted << child
- end
- element.remove
- hoisted
- end
-
- def should_hoist?(node)
- return false unless node.element?
- HTML_HOIST_ELEMENTS.include? node.name
- end
-
- def should_delete?(node)
- return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
- return true if node.element? && node.name == 'head'
- return true if node.text? && node.text.strip.blank?
-
- false
- end
- end
-end
diff --git a/lib/gitlab/email_receiver.rb b/lib/gitlab/email_receiver.rb
deleted file mode 100644
index 3c1f346c0cf..00000000000
--- a/lib/gitlab/email_receiver.rb
+++ /dev/null
@@ -1,192 +0,0 @@
-# Inspired in great part by Discourse's Email::Receiver
-module Gitlab
- class EmailReceiver
- class ProcessingError < StandardError; end
- class EmailUnparsableError < ProcessingError; end
- class EmptyEmailError < ProcessingError; end
- class UserNotFoundError < ProcessingError; end
- class UserNotAuthorizedError < ProcessingError; end
- class NoteableNotFoundError < ProcessingError; end
- class AutoGeneratedEmailError < ProcessingError; end
- class SentNotificationNotFound < ProcessingError; end
- class InvalidNote < ProcessingError; end
-
- def initialize(raw)
- @raw = raw
- end
-
- def message
- @message ||= Mail::Message.new(@raw)
- rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
- raise EmailUnparsableError, e
- end
-
- def execute
- raise SentNotificationNotFound unless sent_notification
-
- raise EmptyEmailError if @raw.blank?
-
- raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/
-
- author = sent_notification.recipient
-
- raise UserNotFoundError unless author
-
- project = sent_notification.project
-
- raise UserNotAuthorizedError unless author.can?(:create_note, project)
-
- raise NoteableNotFoundError unless sent_notification.noteable
-
- body = parse_body(message)
-
- upload_attachments.each do |link|
- body << "\n\n#{link}"
- end
-
- note = Notes::CreateService.new(
- project,
- author,
- note: body,
- noteable_type: sent_notification.noteable_type,
- noteable_id: sent_notification.noteable_id,
- commit_id: sent_notification.commit_id
- ).execute
-
- unless note.persisted?
- message = "The comment could not be created for the following reasons:"
- note.errors.full_messages.each do |error|
- message << "\n\n- #{error}"
- end
- raise InvalidNote, message
- end
- end
-
- def parse_body(message)
- body = select_body(message)
-
- encoding = body.encoding
- raise EmptyEmailError if body.strip.blank?
-
- body = discourse_email_trimmer(body)
- raise EmptyEmailError if body.strip.blank?
-
- body = EmailReplyParser.parse_reply(body)
- raise EmptyEmailError if body.strip.blank?
-
- body.force_encoding(encoding).encode("UTF-8")
- end
-
- private
-
- def reply_key
- reply_key = nil
- message.to.each do |address|
- reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address)
- break if reply_key
- end
-
- reply_key
- end
-
- def sent_notification
- return nil unless reply_key
-
- SentNotification.for(reply_key)
- end
-
- def select_body(message)
- html = nil
- text = nil
-
- if message.multipart?
- html = fix_charset(message.html_part)
- text = fix_charset(message.text_part)
- elsif message.content_type =~ /text\/html/
- html = fix_charset(message)
- end
-
- # prefer plain text
- return text if text
-
- if html
- body = EmailHtmlCleaner.new(html).output_html
- else
- body = fix_charset(message)
- end
-
- # Certain trigger phrases that means we didn't parse correctly
- if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
- raise EmptyEmailError
- end
-
- body
- end
-
- # Force encoding to UTF-8 on a Mail::Message or Mail::Part
- def fix_charset(object)
- return nil if object.nil?
-
- if object.charset
- object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
- else
- object.body.to_s
- end
- rescue
- nil
- end
-
- REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
- REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
-
- def discourse_email_trimmer(body)
- lines = body.scrub.lines.to_a
- range_end = 0
-
- lines.each_with_index do |l, idx|
- # This one might be controversial but so many reply lines have years, times and end with a colon.
- # Let's try it and see how well it works.
- break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
- (l =~ /On \w+ \d+,? \d+,?.*wrote:/)
-
- # Headers on subsequent lines
- break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
- # Headers on the same line
- break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
-
- range_end = idx
- end
-
- lines[0..range_end].join.strip
- end
-
- def upload_attachments
- attachments = []
-
- message.attachments.each do |attachment|
- tmp = Tempfile.new("gitlab-email-attachment")
- begin
- File.open(tmp.path, "w+b") { |f| f.write attachment.body.decoded }
-
- file = {
- tempfile: tmp,
- filename: attachment.filename,
- content_type: attachment.content_type
- }
-
- link = ::Projects::UploadService.new(sent_notification.project, file).execute
- if link
- text = "[#{link[:alt]}](#{link[:url]})"
- text.prepend("!") if link[:is_image]
-
- attachments << text
- end
- ensure
- tmp.close!
- end
- end
-
- attachments
- end
- end
-end