diff options
author | Douwe Maan <douwe@gitlab.com> | 2017-06-20 17:14:37 +0000 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2017-06-20 17:14:37 +0000 |
commit | af5c7c76ff857e88ebfb74267f3ab6cb910d4155 (patch) | |
tree | 012c3abe5de91de53a321891ea0388e7b3876cf1 | |
parent | 026455cf3af1ca7fee0279f3bed645361dac4b1f (diff) | |
parent | b869a99a743f02873038aeeb07d7b5ffbf4f6d89 (diff) | |
download | gitlab-ce-af5c7c76ff857e88ebfb74267f3ab6cb910d4155.tar.gz |
Merge branch 'bugfix/html-email-brackets' into 'master'
unwrap links without an href
Closes #27645
See merge request !9045
-rw-r--r-- | changelogs/unreleased/27645-html-email-brackets-bug.yml | 4 | ||||
-rw-r--r-- | lib/gitlab/email/html_parser.rb | 7 | ||||
-rw-r--r-- | spec/fixtures/emails/html_empty_link.eml | 26 | ||||
-rw-r--r-- | spec/lib/gitlab/email/reply_parser_spec.rb | 4 |
4 files changed, 41 insertions, 0 deletions
diff --git a/changelogs/unreleased/27645-html-email-brackets-bug.yml b/changelogs/unreleased/27645-html-email-brackets-bug.yml new file mode 100644 index 00000000000..e8004d03884 --- /dev/null +++ b/changelogs/unreleased/27645-html-email-brackets-bug.yml @@ -0,0 +1,4 @@ +--- +title: Fix an email parsing bug where brackets would be inserted in emails from some Outlook clients +merge_request: 9045 +author: jneen diff --git a/lib/gitlab/email/html_parser.rb b/lib/gitlab/email/html_parser.rb index a4ca62bfc41..50559a48973 100644 --- a/lib/gitlab/email/html_parser.rb +++ b/lib/gitlab/email/html_parser.rb @@ -17,6 +17,13 @@ module Gitlab def filter_replies! document.xpath('//blockquote').each(&:remove) document.xpath('//table').each(&:remove) + + # bogus links with no href are sometimes added by outlook, + # and can result in Html2Text adding extra square brackets + # to the text, so we unwrap them here. + document.xpath('//a[not(@href)]').each do |link| + link.replace(link.children) + end end def filtered_html diff --git a/spec/fixtures/emails/html_empty_link.eml b/spec/fixtures/emails/html_empty_link.eml new file mode 100644 index 00000000000..1672b98b925 --- /dev/null +++ b/spec/fixtures/emails/html_empty_link.eml @@ -0,0 +1,26 @@ + +MIME-Version: 1.0 +Received: by 10.25.161.144 with HTTP; Tue, 7 Oct 2014 22:17:17 -0700 (PDT) +X-Originating-IP: [117.207.85.84] +In-Reply-To: <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail> +References: <topic/35@discourse.techapj.com> + <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail> +Date: Wed, 8 Oct 2014 10:47:17 +0530 +Delivered-To: arpit@techapj.com +Message-ID: <CAOJeqne=SJ_LwN4sb-0Y95ejc2OpreVhdmcPn0TnmwSvTCYzzQ@mail.gmail.com> +Subject: Re: [Discourse] [Meta] Welcome to techAPJ's Discourse! +From: Arpit Jalan <arpit@techapj.com> +To: Discourse <mail+e1c7f2a380e33840aeb654f075490bad@arpitjalan.com>Accept-Language: en-US +Content-Language: en-US +X-MS-Has-Attach: +X-MS-TNEF-Correlator: +x-originating-ip: [134.68.31.227] +Content-Type: multipart/alternative; + boundary="_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_" +MIME-Version: 1.0 + +--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_ +Content-Type: text/html; charset="utf-8" + +<a name="_MailEndCompose">no brackets!</a> +--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_-- diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb index 28698e89c33..71659d5e8b0 100644 --- a/spec/lib/gitlab/email/reply_parser_spec.rb +++ b/spec/lib/gitlab/email/reply_parser_spec.rb @@ -208,5 +208,9 @@ describe Gitlab::Email::ReplyParser, lib: true do it "properly renders html-only email from MS Outlook" do expect(test_parse_body(fixture_file("emails/outlook_html.eml"))).to eq("Microsoft Outlook 2010") end + + it "does not wrap links with no href in unnecessary brackets" do + expect(test_parse_body(fixture_file("emails/html_empty_link.eml"))).to eq("no brackets!") + end end end |