summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2018-03-05 15:39:36 +0000
committerDouwe Maan <douwe@gitlab.com>2018-03-05 15:39:36 +0000
commit089b2fb9fc4aace5dca759eec87c980c780192d5 (patch)
tree5659343c54fced2149aa252b75c62b82e6f16641
parentfa1debefe7feb7980863aaa383d5a02e488b9a94 (diff)
parentcb55bc3c0770adf7122d7cf49b12cb45c43de7ec (diff)
downloadgitlab-ce-089b2fb9fc4aace5dca759eec87c980c780192d5.tar.gz
Merge branch '41719-mr-title-fix' into 'master'
Render htmlentities correctly for links not supported by Rinku Closes #41719 See merge request gitlab-org/gitlab-ce!17180
-rw-r--r--changelogs/unreleased/41719-mr-title-fix.yml5
-rw-r--r--lib/banzai/filter/autolink_filter.rb86
-rw-r--r--lib/gitlab/string_range_marker.rb2
-rw-r--r--lib/gitlab/string_regex_marker.rb12
-rw-r--r--spec/lib/banzai/filter/autolink_filter_spec.rb126
-rw-r--r--spec/lib/gitlab/string_regex_marker_spec.rb35
6 files changed, 175 insertions, 91 deletions
diff --git a/changelogs/unreleased/41719-mr-title-fix.yml b/changelogs/unreleased/41719-mr-title-fix.yml
new file mode 100644
index 00000000000..92388f30cb2
--- /dev/null
+++ b/changelogs/unreleased/41719-mr-title-fix.yml
@@ -0,0 +1,5 @@
+---
+title: Render htmlentities correctly for links not supported by Rinku
+merge_request:
+author:
+type: fixed
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index b8d2673c1a6..75b64ae9af2 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -25,8 +25,8 @@ module Banzai
# period or comma for punctuation without those characters being included
# in the generated link.
#
- # Rubular: http://rubular.com/r/cxjPyZc7Sb
- LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)}
+ # Rubular: http://rubular.com/r/JzPhi6DCZp
+ LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)}
# Text matching LINK_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
@@ -35,53 +35,19 @@ module Banzai
TEXT_QUERY = %Q(descendant-or-self::text()[
not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
and contains(., '://')
- and not(starts-with(., 'http'))
- and not(starts-with(., 'ftp'))
]).freeze
+ PUNCTUATION_PAIRS = {
+ "'" => "'",
+ '"' => '"',
+ ')' => '(',
+ ']' => '[',
+ '}' => '{'
+ }.freeze
+
def call
return doc if context[:autolink] == false
- rinku_parse
- text_parse
- end
-
- private
-
- # Run the text through Rinku as a first pass
- #
- # This will quickly autolink http(s) and ftp links.
- #
- # `@doc` will be re-parsed with the HTML String from Rinku.
- def rinku_parse
- # Convert the options from a Hash to a String that Rinku expects
- options = tag_options(link_options)
-
- # NOTE: We don't parse email links because it will erroneously match
- # external Commit and CommitRange references.
- #
- # The final argument tells Rinku to link short URLs that don't include a
- # period (e.g., http://localhost:3000/)
- rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1)
-
- return if rinku == html
-
- # Rinku returns a String, so parse it back to a Nokogiri::XML::Document
- # for further processing.
- @doc = parse_html(rinku)
- end
-
- # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
- def contains_unsafe?(scheme)
- return false unless scheme
-
- scheme = scheme.strip.downcase
- Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
- end
-
- # Autolinks any text matching LINK_PATTERN that Rinku didn't already
- # replace
- def text_parse
doc.xpath(TEXT_QUERY).each do |node|
content = node.to_html
@@ -97,6 +63,16 @@ module Banzai
doc
end
+ private
+
+ # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
+ def contains_unsafe?(scheme)
+ return false unless scheme
+
+ scheme = scheme.strip.downcase
+ Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
+ end
+
def autolink_match(match)
# start by stripping out dangerous links
begin
@@ -112,12 +88,30 @@ module Banzai
match.gsub!(/((?:&[\w#]+;)+)\z/, '')
dropped = ($1 || '').html_safe
+ # To match the behaviour of Rinku, if the matched link ends with a
+ # closing part of a matched pair of punctuation, we remove that trailing
+ # character unless there are an equal number of closing and opening
+ # characters in the link.
+ if match.end_with?(*PUNCTUATION_PAIRS.keys)
+ close_character = match[-1]
+ close_count = match.count(close_character)
+ open_character = PUNCTUATION_PAIRS[close_character]
+ open_count = match.count(open_character)
+
+ if open_count != close_count || open_character == close_character
+ dropped += close_character
+ match = match[0..-2]
+ end
+ end
+
options = link_options.merge(href: match)
- content_tag(:a, match, options) + dropped
+ content_tag(:a, match.html_safe, options) + dropped
end
def autolink_filter(text)
- text.gsub(LINK_PATTERN) { |match| autolink_match(match) }
+ Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
+ autolink_match(link)
+ end
end
def link_options
diff --git a/lib/gitlab/string_range_marker.rb b/lib/gitlab/string_range_marker.rb
index f9faa134206..c6ad997a4d4 100644
--- a/lib/gitlab/string_range_marker.rb
+++ b/lib/gitlab/string_range_marker.rb
@@ -14,7 +14,7 @@ module Gitlab
end
def mark(marker_ranges)
- return rich_line unless marker_ranges
+ return rich_line unless marker_ranges&.any?
if html_escaped
rich_marker_ranges = []
diff --git a/lib/gitlab/string_regex_marker.rb b/lib/gitlab/string_regex_marker.rb
index 7ebf1c0428c..b19aa6dea35 100644
--- a/lib/gitlab/string_regex_marker.rb
+++ b/lib/gitlab/string_regex_marker.rb
@@ -1,13 +1,15 @@
module Gitlab
class StringRegexMarker < StringRangeMarker
def mark(regex, group: 0, &block)
- regex_match = raw_line.match(regex)
- return rich_line unless regex_match
+ ranges = []
- begin_index, end_index = regex_match.offset(group)
- name_range = begin_index..(end_index - 1)
+ raw_line.scan(regex) do
+ begin_index, end_index = Regexp.last_match.offset(group)
- super([name_range], &block)
+ ranges << (begin_index..(end_index - 1))
+ end
+
+ super(ranges, &block)
end
end
end
diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb
index b7c2ff03125..b502daea418 100644
--- a/spec/lib/banzai/filter/autolink_filter_spec.rb
+++ b/spec/lib/banzai/filter/autolink_filter_spec.rb
@@ -4,6 +4,7 @@ describe Banzai::Filter::AutolinkFilter do
include FilterSpecHelper
let(:link) { 'http://about.gitlab.com/' }
+ let(:quotes) { ['"', "'"] }
it 'does nothing when :autolink is false' do
exp = act = link
@@ -15,17 +16,7 @@ describe Banzai::Filter::AutolinkFilter do
expect(filter(act).to_html).to eq exp
end
- context 'when the input contains no links' do
- it 'does not parse_html back the rinku returned value' do
- act = HTML::Pipeline.parse('<p>This text contains no links to autolink</p>')
-
- expect_any_instance_of(described_class).not_to receive(:parse_html)
-
- filter(act).to_html
- end
- end
-
- context 'Rinku schemes' do
+ context 'Various schemes' do
it 'autolinks http' do
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq link
@@ -56,32 +47,26 @@ describe Banzai::Filter::AutolinkFilter do
expect(doc.at_css('a')['href']).to eq link
end
- it 'accepts link_attr options' do
- doc = filter("See #{link}", link_attr: { class: 'custom' })
+ it 'autolinks multiple URLs' do
+ link1 = 'http://localhost:3000/'
+ link2 = 'http://google.com/'
- expect(doc.at_css('a')['class']).to eq 'custom'
- end
+ doc = filter("See #{link1} and #{link2}")
- described_class::IGNORE_PARENTS.each do |elem|
- it "ignores valid links contained inside '#{elem}' element" do
- exp = act = "<#{elem}>See #{link}</#{elem}>"
- expect(filter(act).to_html).to eq exp
- end
- end
+ found_links = doc.css('a')
- context 'when the input contains link' do
- it 'does parse_html back the rinku returned value' do
- act = HTML::Pipeline.parse("<p>See #{link}</p>")
+ expect(found_links.size).to eq(2)
+ expect(found_links[0].text).to eq(link1)
+ expect(found_links[0]['href']).to eq(link1)
+ expect(found_links[1].text).to eq(link2)
+ expect(found_links[1]['href']).to eq(link2)
+ end
- expect_any_instance_of(described_class).to receive(:parse_html).at_least(:once).and_call_original
+ it 'accepts link_attr options' do
+ doc = filter("See #{link}", link_attr: { class: 'custom' })
- filter(act).to_html
- end
+ expect(doc.at_css('a')['class']).to eq 'custom'
end
- end
-
- context 'other schemes' do
- let(:link) { 'foo://bar.baz/' }
it 'autolinks smb' do
link = 'smb:///Volumes/shared/foo.pdf'
@@ -91,6 +76,21 @@ describe Banzai::Filter::AutolinkFilter do
expect(doc.at_css('a')['href']).to eq link
end
+ it 'autolinks multiple occurences of smb' do
+ link1 = 'smb:///Volumes/shared/foo.pdf'
+ link2 = 'smb:///Volumes/shared/bar.pdf'
+
+ doc = filter("See #{link1} and #{link2}")
+
+ found_links = doc.css('a')
+
+ expect(found_links.size).to eq(2)
+ expect(found_links[0].text).to eq(link1)
+ expect(found_links[0]['href']).to eq(link1)
+ expect(found_links[1].text).to eq(link2)
+ expect(found_links[1]['href']).to eq(link2)
+ end
+
it 'autolinks irc' do
link = 'irc://irc.freenode.net/git'
doc = filter("See #{link}")
@@ -132,6 +132,45 @@ describe Banzai::Filter::AutolinkFilter do
expect(doc.at_css('a').text).to eq link
end
+ it 'includes trailing punctuation when part of a balanced pair' do
+ described_class::PUNCTUATION_PAIRS.each do |close, open|
+ next if open.in?(quotes)
+
+ balanced_link = "#{link}#{open}abc#{close}"
+ balanced_actual = filter("See #{balanced_link}...")
+ unbalanced_link = "#{link}#{close}"
+ unbalanced_actual = filter("See #{unbalanced_link}...")
+
+ expect(balanced_actual.at_css('a').text).to eq(balanced_link)
+ expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}..."))
+ expect(unbalanced_actual.at_css('a').text).to eq(link)
+ expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}..."))
+ end
+ end
+
+ it 'removes trailing quotes' do
+ quotes.each do |quote|
+ balanced_link = "#{link}#{quote}abc#{quote}"
+ balanced_actual = filter("See #{balanced_link}...")
+ unbalanced_link = "#{link}#{quote}"
+ unbalanced_actual = filter("See #{unbalanced_link}...")
+
+ expect(balanced_actual.at_css('a').text).to eq(balanced_link[0...-1])
+ expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}..."))
+ expect(unbalanced_actual.at_css('a').text).to eq(link)
+ expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}..."))
+ end
+ end
+
+ it 'removes one closing punctuation mark when the punctuation in the link is unbalanced' do
+ complicated_link = "(#{link}(a'b[c'd]))'"
+ expected_complicated_link = %Q{(<a href="#{link}(a'b[c'd]))">#{link}(a'b[c'd]))</a>'}
+ actual = unescape(filter(complicated_link).to_html)
+
+ expect(actual).to eq(Rinku.auto_link(complicated_link))
+ expect(actual).to eq(expected_complicated_link)
+ end
+
it 'does not include trailing HTML entities' do
doc = filter("See &lt;&lt;&lt;#{link}&gt;&gt;&gt;")
@@ -151,4 +190,29 @@ describe Banzai::Filter::AutolinkFilter do
end
end
end
+
+ context 'when the link is inside a tag' do
+ %w[http rdar].each do |protocol|
+ it "renders text after the link correctly for #{protocol}" do
+ doc = filter(ERB::Util.html_escape_once("<#{protocol}://link><another>"))
+
+ expect(doc.children.last.text).to include('<another>')
+ end
+ end
+ end
+
+ # Rinku does not escape these characters in HTML attributes, but content_tag
+ # does. We don't care about that difference for these specs, though.
+ def unescape(html)
+ %w([ ] { }).each do |cgi_escape|
+ html.sub!(CGI.escape(cgi_escape), cgi_escape)
+ end
+
+ quotes.each do |html_escape|
+ html.sub!(CGI.escape_html(html_escape), html_escape)
+ html.sub!(CGI.escape(html_escape), CGI.escape_html(html_escape))
+ end
+
+ html
+ end
end
diff --git a/spec/lib/gitlab/string_regex_marker_spec.rb b/spec/lib/gitlab/string_regex_marker_spec.rb
index d715f9bd641..37b1298b962 100644
--- a/spec/lib/gitlab/string_regex_marker_spec.rb
+++ b/spec/lib/gitlab/string_regex_marker_spec.rb
@@ -2,17 +2,36 @@ require 'spec_helper'
describe Gitlab::StringRegexMarker do
describe '#mark' do
- let(:raw) { %{"name": "AFNetworking"} }
- let(:rich) { %{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"AFNetworking"</span>}.html_safe }
- subject do
- described_class.new(raw, rich).mark(/"[^"]+":\s*"(?<name>[^"]+)"/, group: :name) do |text, left:, right:|
- %{<a href="#">#{text}</a>}
+ context 'with a single occurrence' do
+ let(:raw) { %{"name": "AFNetworking"} }
+ let(:rich) { %{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"AFNetworking"</span>}.html_safe }
+
+ subject do
+ described_class.new(raw, rich).mark(/"[^"]+":\s*"(?<name>[^"]+)"/, group: :name) do |text, left:, right:|
+ %{<a href="#">#{text}</a>}
+ end
+ end
+
+ it 'marks the match' do
+ expect(subject).to eq(%{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"<a href="#">AFNetworking</a>"</span>})
+ expect(subject).to be_html_safe
end
end
- it 'marks the inline diffs' do
- expect(subject).to eq(%{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"<a href="#">AFNetworking</a>"</span>})
- expect(subject).to be_html_safe
+ context 'with multiple occurrences' do
+ let(:raw) { %{a <b> <c> d} }
+ let(:rich) { %{a &lt;b&gt; &lt;c&gt; d}.html_safe }
+
+ subject do
+ described_class.new(raw, rich).mark(/<[a-z]>/) do |text, left:, right:|
+ %{<strong>#{text}</strong>}
+ end
+ end
+
+ it 'marks the matches' do
+ expect(subject).to eq(%{a <strong>&lt;b&gt;</strong> <strong>&lt;c&gt;</strong> d})
+ expect(subject).to be_html_safe
+ end
end
end
end