diff options
author | Sean McGivern <sean@gitlab.com> | 2018-02-22 12:09:27 +0000 |
---|---|---|
committer | Sean McGivern <sean@gitlab.com> | 2018-03-02 13:42:57 +0000 |
commit | cb55bc3c0770adf7122d7cf49b12cb45c43de7ec (patch) | |
tree | 6ec14a9b8b407ac7986b9e940254d63288266379 | |
parent | 1a09d5cda8e9f6b90b85351a16fcddea351b869f (diff) | |
download | gitlab-ce-41719-mr-title-fix.tar.gz |
Match Rinku's behaviour for closing punctuation in links41719-mr-title-fix
Rinku 2.0.0 (the version we use) will remove the last character of a link if
it's a closing part of a punctuation pair (different types of parentheses and
quotes), unless both of the below are true:
1. The matching pair has different start and end characters.
2. There are equal numbers of both in the matched string (they don't have to be
balanced).
-rw-r--r-- | lib/banzai/filter/autolink_filter.rb | 50 | ||||
-rw-r--r-- | spec/lib/banzai/filter/autolink_filter_spec.rb | 73 |
2 files changed, 92 insertions, 31 deletions
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index c4990637971..75b64ae9af2 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -25,7 +25,7 @@ module Banzai # period or comma for punctuation without those characters being included # in the generated link. # - # Rubular: http://rubular.com/r/cxjPyZc7Sb + # Rubular: http://rubular.com/r/JzPhi6DCZp LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)} # Text matching LINK_PATTERN inside these elements will not be linked @@ -37,23 +37,17 @@ module Banzai and contains(., '://') ]).freeze + PUNCTUATION_PAIRS = { + "'" => "'", + '"' => '"', + ')' => '(', + ']' => '[', + '}' => '{' + }.freeze + def call return doc if context[:autolink] == false - text_parse - end - - private - - # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme - def contains_unsafe?(scheme) - return false unless scheme - - scheme = scheme.strip.downcase - Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } - end - - def text_parse doc.xpath(TEXT_QUERY).each do |node| content = node.to_html @@ -69,6 +63,16 @@ module Banzai doc end + private + + # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme + def contains_unsafe?(scheme) + return false unless scheme + + scheme = scheme.strip.downcase + Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } + end + def autolink_match(match) # start by stripping out dangerous links begin @@ -84,6 +88,22 @@ module Banzai match.gsub!(/((?:&[\w#]+;)+)\z/, '') dropped = ($1 || '').html_safe + # To match the behaviour of Rinku, if the matched link ends with a + # closing part of a matched pair of punctuation, we remove that trailing + # character unless there are an equal number of closing and opening + # characters in the link. + if match.end_with?(*PUNCTUATION_PAIRS.keys) + close_character = match[-1] + close_count = match.count(close_character) + open_character = PUNCTUATION_PAIRS[close_character] + open_count = match.count(open_character) + + if open_count != close_count || open_character == close_character + dropped += close_character + match = match[0..-2] + end + end + options = link_options.merge(href: match) content_tag(:a, match.html_safe, options) + dropped end diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb index 0498b99ccf3..b502daea418 100644 --- a/spec/lib/banzai/filter/autolink_filter_spec.rb +++ b/spec/lib/banzai/filter/autolink_filter_spec.rb @@ -4,6 +4,7 @@ describe Banzai::Filter::AutolinkFilter do include FilterSpecHelper let(:link) { 'http://about.gitlab.com/' } + let(:quotes) { ['"', "'"] } it 'does nothing when :autolink is false' do exp = act = link @@ -15,16 +16,6 @@ describe Banzai::Filter::AutolinkFilter do expect(filter(act).to_html).to eq exp end - context 'when the input contains no links' do - it 'does not parse_html back the rinku returned value' do - act = HTML::Pipeline.parse('<p>This text contains no links to autolink</p>') - - expect_any_instance_of(described_class).not_to receive(:parse_html) - - filter(act).to_html - end - end - context 'Various schemes' do it 'autolinks http' do doc = filter("See #{link}") @@ -141,6 +132,45 @@ describe Banzai::Filter::AutolinkFilter do expect(doc.at_css('a').text).to eq link end + it 'includes trailing punctuation when part of a balanced pair' do + described_class::PUNCTUATION_PAIRS.each do |close, open| + next if open.in?(quotes) + + balanced_link = "#{link}#{open}abc#{close}" + balanced_actual = filter("See #{balanced_link}...") + unbalanced_link = "#{link}#{close}" + unbalanced_actual = filter("See #{unbalanced_link}...") + + expect(balanced_actual.at_css('a').text).to eq(balanced_link) + expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}...")) + expect(unbalanced_actual.at_css('a').text).to eq(link) + expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}...")) + end + end + + it 'removes trailing quotes' do + quotes.each do |quote| + balanced_link = "#{link}#{quote}abc#{quote}" + balanced_actual = filter("See #{balanced_link}...") + unbalanced_link = "#{link}#{quote}" + unbalanced_actual = filter("See #{unbalanced_link}...") + + expect(balanced_actual.at_css('a').text).to eq(balanced_link[0...-1]) + expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}...")) + expect(unbalanced_actual.at_css('a').text).to eq(link) + expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}...")) + end + end + + it 'removes one closing punctuation mark when the punctuation in the link is unbalanced' do + complicated_link = "(#{link}(a'b[c'd]))'" + expected_complicated_link = %Q{(<a href="#{link}(a'b[c'd]))">#{link}(a'b[c'd]))</a>'} + actual = unescape(filter(complicated_link).to_html) + + expect(actual).to eq(Rinku.auto_link(complicated_link)) + expect(actual).to eq(expected_complicated_link) + end + it 'does not include trailing HTML entities' do doc = filter("See <<<#{link}>>>") @@ -162,16 +192,27 @@ describe Banzai::Filter::AutolinkFilter do end context 'when the link is inside a tag' do - it 'renders text after the link correctly for http' do - doc = filter(ERB::Util.html_escape_once("<http://link><another>")) + %w[http rdar].each do |protocol| + it "renders text after the link correctly for #{protocol}" do + doc = filter(ERB::Util.html_escape_once("<#{protocol}://link><another>")) - expect(doc.children.last.text).to include('<another>') + expect(doc.children.last.text).to include('<another>') + end end + end - it 'renders text after the link correctly for not other protocol' do - doc = filter(ERB::Util.html_escape_once("<rdar://link><another>")) + # Rinku does not escape these characters in HTML attributes, but content_tag + # does. We don't care about that difference for these specs, though. + def unescape(html) + %w([ ] { }).each do |cgi_escape| + html.sub!(CGI.escape(cgi_escape), cgi_escape) + end - expect(doc.children.last.text).to include('<another>') + quotes.each do |html_escape| + html.sub!(CGI.escape_html(html_escape), html_escape) + html.sub!(CGI.escape(html_escape), CGI.escape_html(html_escape)) end + + html end end |