summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2016-12-27 14:46:47 +0000
committerDouwe Maan <douwe@gitlab.com>2016-12-27 14:46:47 +0000
commitd6a37a8cef7318793b16897e0f56f71f58c4679f (patch)
treeb0d3ac0051ad58a957086461c844468cfeaaa21e
parent47b35ddecd6350c422ffc20d762ed6af232c577f (diff)
parent7218daaa96badbaadf6e94b6729a74a15273935d (diff)
downloadgitlab-ce-d6a37a8cef7318793b16897e0f56f71f58c4679f.tar.gz
Merge branch 'bugfix/outlook-language-parsing' into 'master'
Switch to Discourse's EmailReplyTrimmer for more robust reply parsing See merge request !7473
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock4
-rw-r--r--lib/gitlab/email/reply_parser.rb36
-rw-r--r--spec/lib/gitlab/email/reply_parser_spec.rb2
4 files changed, 13 insertions, 31 deletions
diff --git a/Gemfile b/Gemfile
index 9dfaf7a48a2..fff808e9805 100644
--- a/Gemfile
+++ b/Gemfile
@@ -332,7 +332,7 @@ gem 'octokit', '~> 4.3.0'
gem 'mail_room', '~> 0.9.0'
-gem 'email_reply_parser', '~> 0.5.8'
+gem 'email_reply_trimmer', '~> 0.1'
gem 'html2text'
gem 'ruby-prof', '~> 0.16.2'
diff --git a/Gemfile.lock b/Gemfile.lock
index 9f8367b420a..765d57c6238 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -167,7 +167,7 @@ GEM
railties (>= 4.2)
dropzonejs-rails (0.7.2)
rails (> 3.1)
- email_reply_parser (0.5.8)
+ email_reply_trimmer (0.1.6)
email_spec (1.6.0)
launchy (~> 2.1)
mail (~> 2.2)
@@ -839,7 +839,7 @@ DEPENDENCIES
diffy (~> 3.1.0)
doorkeeper (~> 4.2.0)
dropzonejs-rails (~> 0.7.1)
- email_reply_parser (~> 0.5.8)
+ email_reply_trimmer (~> 0.1)
email_spec (~> 1.6.0)
factory_girl_rails (~> 4.7.0)
ffaker (~> 2.0.0)
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
index f586c5ab062..8c8dd1b9cef 100644
--- a/lib/gitlab/email/reply_parser.rb
+++ b/lib/gitlab/email/reply_parser.rb
@@ -13,9 +13,17 @@ module Gitlab
encoding = body.encoding
- body = discourse_email_trimmer(body)
+ body = EmailReplyTrimmer.trim(body)
- body = EmailReplyParser.parse_reply(body)
+ return '' unless body
+
+ # not using /\s+$/ here because that deletes empty lines
+ body = body.gsub(/[ \t]$/, '')
+
+ # NOTE: We currently don't support empty quotes.
+ # EmailReplyTrimmer allows this as a special case,
+ # so we detect it manually here.
+ return "" if body.lines.all? { |l| l.strip.empty? || l.start_with?('>') }
body.force_encoding(encoding).encode("UTF-8")
end
@@ -57,30 +65,6 @@ module Gitlab
rescue
nil
end
-
- REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
- REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
-
- def discourse_email_trimmer(body)
- lines = body.scrub.lines.to_a
- range_end = 0
-
- lines.each_with_index do |l, idx|
- # This one might be controversial but so many reply lines have years, times and end with a colon.
- # Let's try it and see how well it works.
- break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
- (l =~ /On \w+ \d+,? \d+,?.*wrote:/)
-
- # Headers on subsequent lines
- break if (0..2).all? { |off| lines[idx + off] =~ REPLYING_HEADER_REGEX }
- # Headers on the same line
- break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
-
- range_end = idx
- end
-
- lines[0..range_end].join.strip
- end
end
end
end
diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb
index c7a0139d32a..28698e89c33 100644
--- a/spec/lib/gitlab/email/reply_parser_spec.rb
+++ b/spec/lib/gitlab/email/reply_parser_spec.rb
@@ -88,8 +88,6 @@ describe Gitlab::Email::ReplyParser, lib: true do
expect(test_parse_body(fixture_file("emails/inline_reply.eml"))).
to eq(
<<-BODY.strip_heredoc.chomp
- On Wed, Oct 8, 2014 at 11:12 AM, techAPJ <info@unconfigured.discourse.org> wrote:
-
> techAPJ <https://meta.discourse.org/users/techapj>
> November 28
>