Extract ReplyParser and AttachmentUploader from Receiver.

author: Douwe Maan <douwe@gitlab.com> 2015-08-20 11:05:06 -0700
committer: Douwe Maan <douwe@gitlab.com> 2015-08-20 11:05:06 -0700
commit: e9972efc2f3d730e989907585dd1438c517a0bba (patch)
tree: 7a38f9638cc50813d16d55f9276db98dd7cb041c /lib
parent: 3ff9d5c64cef8bf8daed5e253e388545987fb945 (diff)
download: gitlab-ce-e9972efc2f3d730e989907585dd1438c517a0bba.tar.gz
6 files changed, 362 insertions, 325 deletions
diff --git a/lib/gitlab/email/attachment_uploader.rb b/lib/gitlab/email/attachment_uploader.rb
new file mode 100644
index 00000000000..0c0f50f2751
--- /dev/null
+++ b/lib/gitlab/email/attachment_uploader.rb
@@ -0,0 +1,35 @@
+module Gitlab
+  module Email
+    module AttachmentUploader
+      attr_accessor :message
+
+      def initialize(message)
+        @message = message
+      end
+
+      def execute(project)
+        attachments = []
+
+        message.attachments.each do |attachment|
+          tmp = Tempfile.new("gitlab-email-attachment")
+          begin
+            File.open(tmp.path, "w+b") { |f| f.write attachment.body.decoded }
+
+            file = {
+              tempfile:     tmp,
+              filename:     attachment.filename,
+              content_type: attachment.content_type
+            }
+
+            link = ::Projects::UploadService.new(project, file).execute
+            attachments << link if link
+          ensure
+            tmp.close!
+          end
+        end
+
+        attachments
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb
new file mode 100644
index 00000000000..e1ae9eee56c
--- /dev/null
+++ b/lib/gitlab/email/html_cleaner.rb
@@ -0,0 +1,135 @@
+# Taken mostly from Discourse's Email::HtmlCleaner
+module Gitlab
+  module Email
+    # HtmlCleaner cleans up the extremely dirty HTML that many email clients
+    # generate by stripping out any excess divs or spans, removing styling in
+    # the process (which also makes the html more suitable to be parsed as
+    # Markdown).
+    class HtmlCleaner
+      # Elements to hoist all children out of
+      HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
+      # Node types to always delete
+      HTML_DELETE_ELEMENT_TYPES = [
+        Nokogiri::XML::Node::DTD_NODE,
+        Nokogiri::XML::Node::COMMENT_NODE,
+      ]
+
+      # Private variables:
+      #   @doc - nokogiri document
+      #   @out - same as @doc, but only if trimming has occured
+      def initialize(html)
+        if html.is_a?(String)
+          @doc = Nokogiri::HTML(html)
+        else
+          @doc = html
+        end
+      end
+
+      class << self
+        # HtmlCleaner.trim(inp, opts={})
+        #
+        # Arguments:
+        #   inp - Either a HTML string or a Nokogiri document.
+        # Options:
+        #   :return => :doc, :string
+        #     Specify the desired return type.
+        #     Defaults to the type of the input.
+        #     A value of :string is equivalent to calling get_document_text()
+        #     on the returned document.
+        def trim(inp, opts={})
+          cleaner = HtmlCleaner.new(inp)
+
+          opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
+
+          if opts[:return] == :string
+            cleaner.output_html
+          else
+            cleaner.output_document
+          end
+        end
+
+        # HtmlCleaner.get_document_text(doc)
+        #
+        # Get the body portion of the document, including html, as a string.
+        def get_document_text(doc)
+          body = doc.xpath('//body')
+          if body
+            body.inner_html
+          else
+            doc.inner_html
+          end
+        end
+      end
+
+      def output_document
+        @out ||= begin
+          doc = @doc
+          trim_process_node doc
+          add_newlines doc
+          doc
+        end
+      end
+
+      def output_html
+        HtmlCleaner.get_document_text(output_document)
+      end
+
+      private
+
+      def add_newlines(doc)
+        # Replace <br> tags with a markdown \n
+        doc.xpath('//br').each do |br|
+          br.replace(new_linebreak_node doc, 2)
+        end
+        # Surround <p> tags with newlines, to help with line-wise postprocessing
+        # and ensure markdown paragraphs
+        doc.xpath('//p').each do |p|
+          p.before(new_linebreak_node doc)
+          p.after(new_linebreak_node doc, 2)
+        end
+      end
+
+      def new_linebreak_node(doc, count=1)
+        Nokogiri::XML::Text.new("\n" * count, doc)
+      end
+
+      def trim_process_node(node)
+        if should_hoist?(node)
+          hoisted = trim_hoist_element node
+          hoisted.each { |child| trim_process_node child }
+        elsif should_delete?(node)
+          node.remove
+        else
+          if children = node.children
+            children.each { |child| trim_process_node child }
+          end
+        end
+
+        node
+      end
+
+      def trim_hoist_element(element)
+        hoisted = []
+        element.children.each do |child|
+          element.before(child)
+          hoisted << child
+        end
+        element.remove
+        hoisted
+      end
+
+      def should_hoist?(node)
+        return false unless node.element?
+        HTML_HOIST_ELEMENTS.include? node.name
+      end
+
+      def should_delete?(node)
+        return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
+        return true if node.element? && node.name == 'head'
+        return true if node.text? && node.text.strip.blank?
+
+        false
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/email/receiver.rb b/lib/gitlab/email/receiver.rb
new file mode 100644
index 00000000000..c46fce6afe2
--- /dev/null
+++ b/lib/gitlab/email/receiver.rb
@@ -0,0 +1,101 @@
+# Inspired in great part by Discourse's Email::Receiver
+module Gitlab
+  module Email
+    class Receiver
+      class ProcessingError < StandardError; end
+      class EmailUnparsableError < ProcessingError; end
+      class EmptyEmailError < ProcessingError; end
+      class UserNotFoundError < ProcessingError; end
+      class UserNotAuthorizedError < ProcessingError; end
+      class NoteableNotFoundError < ProcessingError; end
+      class AutoGeneratedEmailError < ProcessingError; end
+      class SentNotificationNotFound < ProcessingError; end
+      class InvalidNote < ProcessingError; end
+
+      def initialize(raw)
+        @raw = raw
+      end
+
+      def message
+        @message ||= Mail::Message.new(@raw)
+      rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
+        raise EmailUnparsableError, e
+      end
+
+      def execute
+        raise SentNotificationNotFound unless sent_notification
+
+        raise EmptyEmailError if @raw.blank?
+
+        raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/
+
+        author = sent_notification.recipient
+
+        raise UserNotFoundError unless author
+
+        project = sent_notification.project
+
+        raise UserNotAuthorizedError unless author.can?(:create_note, project)
+
+        raise NoteableNotFoundError unless sent_notification.noteable
+
+        reply = ReplyParser.new(message).execute.strip
+
+        raise EmptyEmailError if reply.blank?
+
+        reply = add_attachments(reply)
+
+        note = create_note(reply)
+
+        unless note.persisted?
+          message = "The comment could not be created for the following reasons:"
+          note.errors.full_messages.each do |error|
+            message << "\n\n- #{error}"
+          end
+
+          raise InvalidNote, message
+        end
+      end
+
+      private
+
+      def reply_key
+        reply_key = nil
+        message.to.each do |address|
+          reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address)
+          break if reply_key
+        end
+
+        reply_key
+      end
+
+      def sent_notification
+        return nil unless reply_key
+        
+        SentNotification.for(reply_key)
+      end
+
+      def add_attachments(reply)
+        attachments = AttachmentUploader.new(message).execute(project)
+
+        attachments.each do |link|
+          text = "[#{link[:alt]}](#{link[:url]})"
+          text.prepend("!") if link[:is_image]
+
+          reply << "\n\n#{text}"
+        end
+      end
+
+      def create_note(reply)
+        Notes::CreateService.new(
+          sent_notification.project,
+          sent_notification.recipient,
+          note:           reply,
+          noteable_type:  sent_notification.noteable_type,
+          noteable_id:    sent_notification.noteable_id,
+          commit_id:      sent_notification.commit_id
+        ).execute
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
new file mode 100644
index 00000000000..6ceb755968c
--- /dev/null
+++ b/lib/gitlab/email/reply_parser.rb
@@ -0,0 +1,91 @@
+# Inspired in great part by Discourse's Email::Receiver
+module Gitlab
+  module Email
+    class ReplyParser
+      attr_accessor :message
+
+      def initialize(message)
+        @message = message
+      end
+
+      def execute
+        body = select_body(message)
+
+        encoding = body.encoding
+
+        body = discourse_email_trimmer(body)
+
+        body = EmailReplyParser.parse_reply(body)
+
+        body.force_encoding(encoding).encode("UTF-8")
+      end
+
+      private
+
+      def select_body(message)
+        html = nil
+        text = nil
+
+        if message.multipart?
+          html = fix_charset(message.html_part)
+          text = fix_charset(message.text_part)
+        elsif message.content_type =~ /text\/html/
+          html = fix_charset(message)
+        end
+
+        # prefer plain text
+        return text if text
+
+        if html
+          body = HtmlCleaner.new(html).output_html
+        else
+          body = fix_charset(message)
+        end
+
+        # Certain trigger phrases that means we didn't parse correctly
+        if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+          return ""
+        end
+
+        body
+      end
+
+      # Force encoding to UTF-8 on a Mail::Message or Mail::Part
+      def fix_charset(object)
+        return nil if object.nil?
+
+        if object.charset
+          object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
+        else
+          object.body.to_s
+        end
+      rescue
+        nil
+      end
+
+      REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
+      REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
+
+      def discourse_email_trimmer(body)
+        lines = body.scrub.lines.to_a
+        range_end = 0
+
+        lines.each_with_index do |l, idx|
+          # This one might be controversial but so many reply lines have years, times and end with a colon.
+         # Let's try it and see how well it works.
+         break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
+                   (l =~ /On \w+ \d+,? \d+,?.*wrote:/)
+
+          # Headers on subsequent lines
+          break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
+          # Headers on the same line
+          break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
+
+          range_end = idx
+        end
+
+        lines[0..range_end].join.strip
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/email_html_cleaner.rb b/lib/gitlab/email_html_cleaner.rb
deleted file mode 100644
index 6d7a17fe87c..00000000000
--- a/lib/gitlab/email_html_cleaner.rb
+++ /dev/null
@@ -1,133 +0,0 @@
-# Taken mostly from Discourse's Email::HtmlCleaner
-module Gitlab
-  # HtmlCleaner cleans up the extremely dirty HTML that many email clients
-  # generate by stripping out any excess divs or spans, removing styling in
-  # the process (which also makes the html more suitable to be parsed as
-  # Markdown).
-  class EmailHtmlCleaner
-    # Elements to hoist all children out of
-    HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
-    # Node types to always delete
-    HTML_DELETE_ELEMENT_TYPES = [
-      Nokogiri::XML::Node::DTD_NODE,
-      Nokogiri::XML::Node::COMMENT_NODE,
-    ]
-
-    # Private variables:
-    #   @doc - nokogiri document
-    #   @out - same as @doc, but only if trimming has occured
-    def initialize(html)
-      if html.is_a?(String)
-        @doc = Nokogiri::HTML(html)
-      else
-        @doc = html
-      end
-    end
-
-    class << self
-      # EmailHtmlCleaner.trim(inp, opts={})
-      #
-      # Arguments:
-      #   inp - Either a HTML string or a Nokogiri document.
-      # Options:
-      #   :return => :doc, :string
-      #     Specify the desired return type.
-      #     Defaults to the type of the input.
-      #     A value of :string is equivalent to calling get_document_text()
-      #     on the returned document.
-      def trim(inp, opts={})
-        cleaner = EmailHtmlCleaner.new(inp)
-
-        opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
-
-        if opts[:return] == :string
-          cleaner.output_html
-        else
-          cleaner.output_document
-        end
-      end
-
-      # EmailHtmlCleaner.get_document_text(doc)
-      #
-      # Get the body portion of the document, including html, as a string.
-      def get_document_text(doc)
-        body = doc.xpath('//body')
-        if body
-          body.inner_html
-        else
-          doc.inner_html
-        end
-      end
-    end
-
-    def output_document
-      @out ||= begin
-        doc = @doc
-        trim_process_node doc
-        add_newlines doc
-        doc
-      end
-    end
-
-    def output_html
-      EmailHtmlCleaner.get_document_text(output_document)
-    end
-
-    private
-
-    def add_newlines(doc)
-      # Replace <br> tags with a markdown \n
-      doc.xpath('//br').each do |br|
-        br.replace(new_linebreak_node doc, 2)
-      end
-      # Surround <p> tags with newlines, to help with line-wise postprocessing
-      # and ensure markdown paragraphs
-      doc.xpath('//p').each do |p|
-        p.before(new_linebreak_node doc)
-        p.after(new_linebreak_node doc, 2)
-      end
-    end
-
-    def new_linebreak_node(doc, count=1)
-      Nokogiri::XML::Text.new("\n" * count, doc)
-    end
-
-    def trim_process_node(node)
-      if should_hoist?(node)
-        hoisted = trim_hoist_element node
-        hoisted.each { |child| trim_process_node child }
-      elsif should_delete?(node)
-        node.remove
-      else
-        if children = node.children
-          children.each { |child| trim_process_node child }
-        end
-      end
-
-      node
-    end
-
-    def trim_hoist_element(element)
-      hoisted = []
-      element.children.each do |child|
-        element.before(child)
-        hoisted << child
-      end
-      element.remove
-      hoisted
-    end
-
-    def should_hoist?(node)
-      return false unless node.element?
-      HTML_HOIST_ELEMENTS.include? node.name
-    end
-
-    def should_delete?(node)
-      return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
-      return true if node.element? && node.name == 'head'
-      return true if node.text? && node.text.strip.blank?
-
-      false
-    end
-  end
-end
diff --git a/lib/gitlab/email_receiver.rb b/lib/gitlab/email_receiver.rb
deleted file mode 100644
index 3c1f346c0cf..00000000000
--- a/lib/gitlab/email_receiver.rb
+++ /dev/null
@@ -1,192 +0,0 @@
-# Inspired in great part by Discourse's Email::Receiver
-module Gitlab
-  class EmailReceiver
-    class ProcessingError < StandardError; end
-    class EmailUnparsableError < ProcessingError; end
-    class EmptyEmailError < ProcessingError; end
-    class UserNotFoundError < ProcessingError; end
-    class UserNotAuthorizedError < ProcessingError; end
-    class NoteableNotFoundError < ProcessingError; end
-    class AutoGeneratedEmailError < ProcessingError; end
-    class SentNotificationNotFound < ProcessingError; end
-    class InvalidNote < ProcessingError; end
-
-    def initialize(raw)
-      @raw = raw
-    end
-
-    def message
-      @message ||= Mail::Message.new(@raw)
-    rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
-      raise EmailUnparsableError, e
-    end
-
-    def execute
-      raise SentNotificationNotFound unless sent_notification
-
-      raise EmptyEmailError if @raw.blank?
-
-      raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/
-
-      author = sent_notification.recipient
-
-      raise UserNotFoundError unless author
-
-      project = sent_notification.project
-
-      raise UserNotAuthorizedError unless author.can?(:create_note, project)
-
-      raise NoteableNotFoundError unless sent_notification.noteable
-
-      body = parse_body(message)
-
-      upload_attachments.each do |link|
-        body << "\n\n#{link}"
-      end
-
-      note = Notes::CreateService.new(
-        project,
-        author,
-        note:           body,
-        noteable_type:  sent_notification.noteable_type,
-        noteable_id:    sent_notification.noteable_id,
-        commit_id:      sent_notification.commit_id
-      ).execute
-
-      unless note.persisted?
-        message = "The comment could not be created for the following reasons:"
-        note.errors.full_messages.each do |error|
-          message << "\n\n- #{error}"
-        end
-        raise InvalidNote, message
-      end
-    end
-
-    def parse_body(message)
-      body = select_body(message)
-
-      encoding = body.encoding
-      raise EmptyEmailError if body.strip.blank?
-
-      body = discourse_email_trimmer(body)
-      raise EmptyEmailError if body.strip.blank?
-
-      body = EmailReplyParser.parse_reply(body)
-      raise EmptyEmailError if body.strip.blank?
-
-      body.force_encoding(encoding).encode("UTF-8")
-    end
-
-    private
-
-    def reply_key
-      reply_key = nil
-      message.to.each do |address|
-        reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address)
-        break if reply_key
-      end
-
-      reply_key
-    end
-
-    def sent_notification
-      return nil unless reply_key
-      
-      SentNotification.for(reply_key)
-    end
-
-    def select_body(message)
-      html = nil
-      text = nil
-
-      if message.multipart?
-        html = fix_charset(message.html_part)
-        text = fix_charset(message.text_part)
-      elsif message.content_type =~ /text\/html/
-        html = fix_charset(message)
-      end
-
-      # prefer plain text
-      return text if text
-
-      if html
-        body = EmailHtmlCleaner.new(html).output_html
-      else
-        body = fix_charset(message)
-      end
-
-      # Certain trigger phrases that means we didn't parse correctly
-      if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
-        raise EmptyEmailError
-      end
-
-      body
-    end
-
-    # Force encoding to UTF-8 on a Mail::Message or Mail::Part
-    def fix_charset(object)
-      return nil if object.nil?
-
-      if object.charset
-        object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
-      else
-        object.body.to_s
-      end
-    rescue
-      nil
-    end
-
-    REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
-    REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
-
-    def discourse_email_trimmer(body)
-      lines = body.scrub.lines.to_a
-      range_end = 0
-
-      lines.each_with_index do |l, idx|
-        # This one might be controversial but so many reply lines have years, times and end with a colon.
-       # Let's try it and see how well it works.
-       break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
-                 (l =~ /On \w+ \d+,? \d+,?.*wrote:/)
-
-        # Headers on subsequent lines
-        break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
-        # Headers on the same line
-        break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
-
-        range_end = idx
-      end
-
-      lines[0..range_end].join.strip
-    end
-
-    def upload_attachments
-      attachments = []
-
-      message.attachments.each do |attachment|
-        tmp = Tempfile.new("gitlab-email-attachment")
-        begin
-          File.open(tmp.path, "w+b") { |f| f.write attachment.body.decoded }
-
-          file = {
-            tempfile:     tmp,
-            filename:     attachment.filename,
-            content_type: attachment.content_type
-          }
-
-          link = ::Projects::UploadService.new(sent_notification.project, file).execute
-          if link
-            text = "[#{link[:alt]}](#{link[:url]})"
-            text.prepend("!") if link[:is_image]
-
-            attachments << text
-          end
-        ensure
-          tmp.close!
-        end
-      end
-
-      attachments
-    end
-  end
-end
author	Douwe Maan <douwe@gitlab.com>	2015-08-20 11:05:06 -0700
committer	Douwe Maan <douwe@gitlab.com>	2015-08-20 11:05:06 -0700
commit	e9972efc2f3d730e989907585dd1438c517a0bba (patch)
tree	7a38f9638cc50813d16d55f9276db98dd7cb041c /lib
parent	3ff9d5c64cef8bf8daed5e253e388545987fb945 (diff)
download	gitlab-ce-e9972efc2f3d730e989907585dd1438c517a0bba.tar.gz