summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2017-06-20 17:14:37 +0000
committerDouwe Maan <douwe@gitlab.com>2017-06-20 17:14:37 +0000
commitaf5c7c76ff857e88ebfb74267f3ab6cb910d4155 (patch)
tree012c3abe5de91de53a321891ea0388e7b3876cf1
parent026455cf3af1ca7fee0279f3bed645361dac4b1f (diff)
parentb869a99a743f02873038aeeb07d7b5ffbf4f6d89 (diff)
downloadgitlab-ce-af5c7c76ff857e88ebfb74267f3ab6cb910d4155.tar.gz
Merge branch 'bugfix/html-email-brackets' into 'master'
unwrap links without an href Closes #27645 See merge request !9045
-rw-r--r--changelogs/unreleased/27645-html-email-brackets-bug.yml4
-rw-r--r--lib/gitlab/email/html_parser.rb7
-rw-r--r--spec/fixtures/emails/html_empty_link.eml26
-rw-r--r--spec/lib/gitlab/email/reply_parser_spec.rb4
4 files changed, 41 insertions, 0 deletions
diff --git a/changelogs/unreleased/27645-html-email-brackets-bug.yml b/changelogs/unreleased/27645-html-email-brackets-bug.yml
new file mode 100644
index 00000000000..e8004d03884
--- /dev/null
+++ b/changelogs/unreleased/27645-html-email-brackets-bug.yml
@@ -0,0 +1,4 @@
+---
+title: Fix an email parsing bug where brackets would be inserted in emails from some Outlook clients
+merge_request: 9045
+author: jneen
diff --git a/lib/gitlab/email/html_parser.rb b/lib/gitlab/email/html_parser.rb
index a4ca62bfc41..50559a48973 100644
--- a/lib/gitlab/email/html_parser.rb
+++ b/lib/gitlab/email/html_parser.rb
@@ -17,6 +17,13 @@ module Gitlab
def filter_replies!
document.xpath('//blockquote').each(&:remove)
document.xpath('//table').each(&:remove)
+
+ # bogus links with no href are sometimes added by outlook,
+ # and can result in Html2Text adding extra square brackets
+ # to the text, so we unwrap them here.
+ document.xpath('//a[not(@href)]').each do |link|
+ link.replace(link.children)
+ end
end
def filtered_html
diff --git a/spec/fixtures/emails/html_empty_link.eml b/spec/fixtures/emails/html_empty_link.eml
new file mode 100644
index 00000000000..1672b98b925
--- /dev/null
+++ b/spec/fixtures/emails/html_empty_link.eml
@@ -0,0 +1,26 @@
+
+MIME-Version: 1.0
+Received: by 10.25.161.144 with HTTP; Tue, 7 Oct 2014 22:17:17 -0700 (PDT)
+X-Originating-IP: [117.207.85.84]
+In-Reply-To: <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail>
+References: <topic/35@discourse.techapj.com>
+ <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail>
+Date: Wed, 8 Oct 2014 10:47:17 +0530
+Delivered-To: arpit@techapj.com
+Message-ID: <CAOJeqne=SJ_LwN4sb-0Y95ejc2OpreVhdmcPn0TnmwSvTCYzzQ@mail.gmail.com>
+Subject: Re: [Discourse] [Meta] Welcome to techAPJ's Discourse!
+From: Arpit Jalan <arpit@techapj.com>
+To: Discourse <mail+e1c7f2a380e33840aeb654f075490bad@arpitjalan.com>Accept-Language: en-US
+Content-Language: en-US
+X-MS-Has-Attach:
+X-MS-TNEF-Correlator:
+x-originating-ip: [134.68.31.227]
+Content-Type: multipart/alternative;
+ boundary="_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_"
+MIME-Version: 1.0
+
+--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_
+Content-Type: text/html; charset="utf-8"
+
+<a name="_MailEndCompose">no brackets!</a>
+--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_--
diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb
index 28698e89c33..71659d5e8b0 100644
--- a/spec/lib/gitlab/email/reply_parser_spec.rb
+++ b/spec/lib/gitlab/email/reply_parser_spec.rb
@@ -208,5 +208,9 @@ describe Gitlab::Email::ReplyParser, lib: true do
it "properly renders html-only email from MS Outlook" do
expect(test_parse_body(fixture_file("emails/outlook_html.eml"))).to eq("Microsoft Outlook 2010")
end
+
+ it "does not wrap links with no href in unnecessary brackets" do
+ expect(test_parse_body(fixture_file("emails/html_empty_link.eml"))).to eq("no brackets!")
+ end
end
end