summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2015-08-18 17:02:26 -0700
committerDouwe Maan <douwe@gitlab.com>2015-08-18 17:02:26 -0700
commit8906cabae7a6be44cafcedcaf27352614fcc462b (patch)
tree6ff7220890a5d8e22d16af0daa9689eddd0d4767
parenta428838dc00dd95f5dad0e433ad2eb73df2eff0f (diff)
downloadgitlab-ce-8906cabae7a6be44cafcedcaf27352614fcc462b.tar.gz
Changes and stuff.
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock2
-rw-r--r--app/mailers/notify.rb10
-rw-r--r--app/models/sent_notification.rb8
-rw-r--r--app/workers/email_receiver_worker.rb2
-rw-r--r--config/initializers/1_settings.rb2
-rw-r--r--lib/gitlab/email_html_cleaner.rb133
-rw-r--r--lib/gitlab/email_receiver.rb144
-rw-r--r--lib/gitlab/reply_by_email.rb47
9 files changed, 320 insertions, 30 deletions
diff --git a/Gemfile b/Gemfile
index e3f76671f5b..9879141f5cb 100644
--- a/Gemfile
+++ b/Gemfile
@@ -274,3 +274,5 @@ gem "newrelic_rpm"
gem 'octokit', '3.7.0'
gem "mail_room", github: "DouweM/mail_room", branch: "sidekiq"
+
+gem 'email_reply_parser'
diff --git a/Gemfile.lock b/Gemfile.lock
index 597eb4cde05..629d128b424 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -163,6 +163,7 @@ GEM
dotenv (0.9.0)
dropzonejs-rails (0.7.1)
rails (> 3.1)
+ email_reply_parser (0.5.8)
email_spec (1.6.0)
launchy (~> 2.1)
mail (~> 2.2)
@@ -780,6 +781,7 @@ DEPENDENCIES
diffy (~> 3.0.3)
doorkeeper (= 2.1.3)
dropzonejs-rails
+ email_reply_parser
email_spec (~> 1.6.0)
enumerize
factory_girl_rails
diff --git a/app/mailers/notify.rb b/app/mailers/notify.rb
index 44df3d6407d..c2ea99d9688 100644
--- a/app/mailers/notify.rb
+++ b/app/mailers/notify.rb
@@ -146,7 +146,7 @@ class Notify < ActionMailer::Base
if reply_key
headers['X-GitLab-Reply-Key'] = reply_key
- headers['Reply-To'] = Gitlab.config.reply_by_email.address.gsub('%{reply_key}', reply_key)
+ headers['Reply-To'] = Gitlab::ReplyByEmail.reply_address(reply_key)
end
mail(headers)
@@ -165,6 +165,10 @@ class Notify < ActionMailer::Base
headers['In-Reply-To'] = message_id(model)
headers['References'] = message_id(model)
+ if headers[:subject]
+ headers[:subject].prepend('Re: ')
+ end
+
mail_new_thread(model, headers)
end
@@ -173,8 +177,6 @@ class Notify < ActionMailer::Base
end
def reply_key
- return nil unless Gitlab.config.reply_by_email.enabled
-
- @reply_key ||= SecureRandom.hex(16)
+ @reply_key ||= Gitlab::ReplyByEmail.reply_key
end
end
diff --git a/app/models/sent_notification.rb b/app/models/sent_notification.rb
index a3d24669b52..23a1b19ea7c 100644
--- a/app/models/sent_notification.rb
+++ b/app/models/sent_notification.rb
@@ -6,8 +6,8 @@ class SentNotification < ActiveRecord::Base
validate :project, :recipient, :reply_key, presence: true
validate :reply_key, uniqueness: true
- validates :noteable_id, presence: true, if: ->(n) { n.noteable_type.present? && n.noteable_type != 'Commit' }
- validates :commit_id, presence: true, if: ->(n) { n.noteable_type == 'Commit' }
+ validates :noteable_id, presence: true, unless: :for_commit?
+ validates :commit_id, presence: true, if: :for_commit?
def self.for(reply_key)
find_by(reply_key: reply_key)
@@ -19,11 +19,9 @@ class SentNotification < ActiveRecord::Base
def noteable
if for_commit?
- project.commit(commit_id)
+ project.commit(commit_id) rescue nil
else
super
end
- rescue
- nil
end
end
diff --git a/app/workers/email_receiver_worker.rb b/app/workers/email_receiver_worker.rb
index e44a430f6bc..94e346b5a51 100644
--- a/app/workers/email_receiver_worker.rb
+++ b/app/workers/email_receiver_worker.rb
@@ -4,7 +4,7 @@ class EmailReceiverWorker
sidekiq_options queue: :incoming_email
def perform(raw)
- return unless Gitlab.config.reply_by_email.enabled
+ return unless Gitlab::ReplyByEmail.enabled?
# begin
Gitlab::EmailReceiver.new(raw).process
diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb
index 9e83660e103..654de6238d0 100644
--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -153,7 +153,7 @@ Settings.gitlab['restricted_signup_domains'] ||= []
# Reply by email
#
Settings['reply_by_email'] ||= Settingslogic.new({})
-Settings.reply_by_email['enabled'] = false if Settings.gravatar['enabled'].nil?
+Settings.reply_by_email['enabled'] = false if Settings.reply_by_email['enabled'].nil?
#
# Gravatar
diff --git a/lib/gitlab/email_html_cleaner.rb b/lib/gitlab/email_html_cleaner.rb
new file mode 100644
index 00000000000..6d7a17fe87c
--- /dev/null
+++ b/lib/gitlab/email_html_cleaner.rb
@@ -0,0 +1,133 @@
+# Taken mostly from Discourse's Email::HtmlCleaner
+module Gitlab
+ # HtmlCleaner cleans up the extremely dirty HTML that many email clients
+ # generate by stripping out any excess divs or spans, removing styling in
+ # the process (which also makes the html more suitable to be parsed as
+ # Markdown).
+ class EmailHtmlCleaner
+ # Elements to hoist all children out of
+ HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
+ # Node types to always delete
+ HTML_DELETE_ELEMENT_TYPES = [
+ Nokogiri::XML::Node::DTD_NODE,
+ Nokogiri::XML::Node::COMMENT_NODE,
+ ]
+
+ # Private variables:
+ # @doc - nokogiri document
+ # @out - same as @doc, but only if trimming has occured
+ def initialize(html)
+ if html.is_a?(String)
+ @doc = Nokogiri::HTML(html)
+ else
+ @doc = html
+ end
+ end
+
+ class << self
+ # EmailHtmlCleaner.trim(inp, opts={})
+ #
+ # Arguments:
+ # inp - Either a HTML string or a Nokogiri document.
+ # Options:
+ # :return => :doc, :string
+ # Specify the desired return type.
+ # Defaults to the type of the input.
+ # A value of :string is equivalent to calling get_document_text()
+ # on the returned document.
+ def trim(inp, opts={})
+ cleaner = EmailHtmlCleaner.new(inp)
+
+ opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
+
+ if opts[:return] == :string
+ cleaner.output_html
+ else
+ cleaner.output_document
+ end
+ end
+
+ # EmailHtmlCleaner.get_document_text(doc)
+ #
+ # Get the body portion of the document, including html, as a string.
+ def get_document_text(doc)
+ body = doc.xpath('//body')
+ if body
+ body.inner_html
+ else
+ doc.inner_html
+ end
+ end
+ end
+
+ def output_document
+ @out ||= begin
+ doc = @doc
+ trim_process_node doc
+ add_newlines doc
+ doc
+ end
+ end
+
+ def output_html
+ EmailHtmlCleaner.get_document_text(output_document)
+ end
+
+ private
+
+ def add_newlines(doc)
+ # Replace <br> tags with a markdown \n
+ doc.xpath('//br').each do |br|
+ br.replace(new_linebreak_node doc, 2)
+ end
+ # Surround <p> tags with newlines, to help with line-wise postprocessing
+ # and ensure markdown paragraphs
+ doc.xpath('//p').each do |p|
+ p.before(new_linebreak_node doc)
+ p.after(new_linebreak_node doc, 2)
+ end
+ end
+
+ def new_linebreak_node(doc, count=1)
+ Nokogiri::XML::Text.new("\n" * count, doc)
+ end
+
+ def trim_process_node(node)
+ if should_hoist?(node)
+ hoisted = trim_hoist_element node
+ hoisted.each { |child| trim_process_node child }
+ elsif should_delete?(node)
+ node.remove
+ else
+ if children = node.children
+ children.each { |child| trim_process_node child }
+ end
+ end
+
+ node
+ end
+
+ def trim_hoist_element(element)
+ hoisted = []
+ element.children.each do |child|
+ element.before(child)
+ hoisted << child
+ end
+ element.remove
+ hoisted
+ end
+
+ def should_hoist?(node)
+ return false unless node.element?
+ HTML_HOIST_ELEMENTS.include? node.name
+ end
+
+ def should_delete?(node)
+ return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
+ return true if node.element? && node.name == 'head'
+ return true if node.text? && node.text.strip.blank?
+
+ false
+ end
+ end
+end
diff --git a/lib/gitlab/email_receiver.rb b/lib/gitlab/email_receiver.rb
index a9f67bb5da0..18a06d2ee8c 100644
--- a/lib/gitlab/email_receiver.rb
+++ b/lib/gitlab/email_receiver.rb
@@ -1,44 +1,69 @@
+# Inspired in great part by Discourse's Email::Receiver
module Gitlab
class EmailReceiver
+ class ProcessingError < StandardError; end
+ class EmailUnparsableError < ProcessingError; end
+ class EmptyEmailError < ProcessingError; end
+ class UserNotFoundError < ProcessingError; end
+ class UserNotAuthorizedLevelError < ProcessingError; end
+ class NoteableNotFoundError < ProcessingError; end
+ class AutoGeneratedEmailError < ProcessingError; end
+ class SentNotificationNotFound < ProcessingError; end
+ class InvalidNote < ProcessingError; end
+
def initialize(raw)
@raw = raw
end
def message
@message ||= Mail::Message.new(@raw)
+ rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
+ raise EmailUnparsableError, e
end
def process
- return unless message && sent_notification
+ raise EmptyEmailError if @raw.blank?
+
+ raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/
+
+ raise SentNotificationNotFound unless sent_notification
+
+ author = sent_notification.recipient
+
+ raise UserNotFoundError unless author
+
+ project = sent_notification.project
+
+ raise UserNotAuthorizedLevelError unless author.can?(:create_note, project)
+
+ raise NoteableNotFoundError unless sent_notification.noteable
+
+ body = parse_body(message)
- Notes::CreateService.new(
- sent_notification.project,
- sent_notification.recipient,
- note: message.text_part.to_s,
+ note = Notes::CreateService.new(
+ project,
+ author,
+ note: body,
noteable_type: sent_notification.noteable_type,
noteable_id: sent_notification.noteable_id,
commit_id: sent_notification.commit_id
).execute
+
+ unless note.persisted?
+ raise InvalidNote, note.errors.full_messages.join("\n")
+ end
end
private
def reply_key
- address = Gitlab.config.reply_by_email.address
- return nil unless address
-
- regex = Regexp.escape(address)
- regex = regex.gsub(Regexp.escape('%{reply_key}'), "(.*)")
- regex = Regexp.new(regex)
-
- address = message.to.find { |address| address =~ regex }
- return nil unless address
+ reply_key = nil
+ message.to.each do |address|
+ reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address)
+ break if reply_key
+ end
- match = address.match(regex)
-
- return nil unless match && match[1].present?
-
- match[1]
+ reply_key
end
def sent_notification
@@ -46,5 +71,86 @@ module Gitlab
SentNotification.for(reply_key)
end
+
+ def parse_body(message)
+ body = select_body(message)
+
+ encoding = body.encoding
+ raise EmptyEmailError if body.strip.blank?
+
+ body = discourse_email_trimmer(body)
+ raise EmptyEmailError if body.strip.blank?
+
+ body = EmailReplyParser.parse_reply(body)
+ raise EmptyEmailError if body.strip.blank?
+
+ body.force_encoding(encoding).encode("UTF-8")
+ end
+
+ def select_body(message)
+ html = nil
+ text = nil
+
+ if message.multipart?
+ html = fix_charset(message.html_part)
+ text = fix_charset(message.text_part)
+ elsif message.content_type =~ /text\/html/
+ html = fix_charset(message)
+ end
+
+ # prefer plain text
+ return text if text
+
+ if html
+ body = EmailHtmlCleaner.new(html).output_html
+ else
+ body = fix_charset(message)
+ end
+
+ # Certain trigger phrases that means we didn't parse correctly
+ if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ raise EmptyEmailError
+ end
+
+ body
+ end
+
+ # Force encoding to UTF-8 on a Mail::Message or Mail::Part
+ def fix_charset(object)
+ return nil if object.nil?
+
+ if object.charset
+ object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
+ else
+ object.body.to_s
+ end
+ rescue
+ nil
+ end
+
+ REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
+ REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
+
+ def discourse_email_trimmer(body)
+ lines = body.scrub.lines.to_a
+ range_end = 0
+
+ lines.each_with_index do |l, idx|
+ break if l =~ /\A\s*\-{3,80}\s*\z/ ||
+ # This one might be controversial but so many reply lines have years, times and end with a colon.
+ # Let's try it and see how well it works.
+ (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
+ (l =~ /On \w+ \d+,? \d+,?.*wrote:/)
+
+ # Headers on subsequent lines
+ break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
+ # Headers on the same line
+ break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
+
+ range_end = idx
+ end
+
+ lines[0..range_end].join.strip
+ end
end
end
diff --git a/lib/gitlab/reply_by_email.rb b/lib/gitlab/reply_by_email.rb
new file mode 100644
index 00000000000..b6157de3610
--- /dev/null
+++ b/lib/gitlab/reply_by_email.rb
@@ -0,0 +1,47 @@
+module Gitlab
+ module ReplyByEmail
+ class << self
+ def enabled?
+ config.enabled &&
+ config.address &&
+ config.address.include?("%{reply_key}")
+ end
+
+ def reply_key
+ return nil unless enabled?
+
+ SecureRandom.hex(16)
+ end
+
+ def reply_address(reply_key)
+ config.address.gsub('%{reply_key}', reply_key)
+ end
+
+ def reply_key_from_address(address)
+ return unless address_regex
+
+ match = address.match(address_regex)
+ return unless match
+
+ match[1]
+ end
+
+ private
+
+ def config
+ Gitlab.config.reply_by_email
+ end
+
+ def address_regex
+ @address_regex ||= begin
+ wildcard_address = config.address
+ return nil unless wildcard_address
+
+ regex = Regexp.escape(wildcard_address)
+ regex = regex.gsub(Regexp.escape('%{reply_key}'), "(.+)")
+ Regexp.new(regex).freeze
+ end
+ end
+ end
+ end
+end