summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrett Walker <bwalker@gitlab.com>2018-08-21 08:20:30 +0000
committerSean McGivern <sean@mcgivern.me.uk>2018-08-21 08:20:30 +0000
commit198f8a2c3d3d173fe59e948e6bab49440a658efa (patch)
tree90b94e8dc3a28d2716f08a2bc1375d24c6027803
parent5dabe6d1f309530a26a11354b994bdbc988b852f (diff)
downloadgitlab-ce-198f8a2c3d3d173fe59e948e6bab49440a658efa.tar.gz
Resolve "Wiki: links with spaces in the url render incorrectly with CommonMark"
-rw-r--r--changelogs/unreleased/48869-wiki-slugs-with-spaces.yml5
-rw-r--r--lib/banzai/filter/spaced_link_filter.rb77
-rw-r--r--lib/banzai/pipeline/wiki_pipeline.rb1
-rw-r--r--spec/lib/banzai/filter/spaced_link_filter_spec.rb66
4 files changed, 149 insertions, 0 deletions
diff --git a/changelogs/unreleased/48869-wiki-slugs-with-spaces.yml b/changelogs/unreleased/48869-wiki-slugs-with-spaces.yml
new file mode 100644
index 00000000000..88ba8028e2c
--- /dev/null
+++ b/changelogs/unreleased/48869-wiki-slugs-with-spaces.yml
@@ -0,0 +1,5 @@
+---
+title: Allow spaces in wiki markdown links when using CommonMark
+merge_request: 20417
+author:
+type: fixed
diff --git a/lib/banzai/filter/spaced_link_filter.rb b/lib/banzai/filter/spaced_link_filter.rb
new file mode 100644
index 00000000000..574a8a6c7a5
--- /dev/null
+++ b/lib/banzai/filter/spaced_link_filter.rb
@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+
+require 'uri'
+
+module Banzai
+ module Filter
+ # HTML Filter for markdown links with spaces in the URLs
+ #
+ # Based on Banzai::Filter::AutolinkFilter
+ #
+ # CommonMark does not allow spaces in the url portion of a link.
+ # For example, `[example](page slug)` is not valid. However,
+ # in our wikis, we support (via RedCarpet) this type of link, allowing
+ # wiki pages to be easily linked by their title. This filter adds that functionality.
+ # The intent is for this to only be used in Wikis - in general, we want
+ # to adhere to CommonMark's spec.
+ #
+ class SpacedLinkFilter < HTML::Pipeline::Filter
+ include ActionView::Helpers::TagHelper
+
+ # Pattern to match a standard markdown link
+ #
+ # Rubular: http://rubular.com/r/z9EAHxYmKI
+ LINK_PATTERN = /\[([^\]]+)\]\(([^)"]+)(?: \"([^\"]+)\")?\)/
+
+ # Text matching LINK_PATTERN inside these elements will not be linked
+ IGNORE_PARENTS = %w(a code kbd pre script style).to_set
+
+ # The XPath query to use for finding text nodes to parse.
+ TEXT_QUERY = %Q(descendant-or-self::text()[
+ not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
+ and contains(., ']\(')
+ ]).freeze
+
+ def call
+ return doc if context[:markdown_engine] == :redcarpet
+
+ doc.xpath(TEXT_QUERY).each do |node|
+ content = node.to_html
+
+ next unless content.match(LINK_PATTERN)
+
+ html = spaced_link_filter(content)
+
+ next if html == content
+
+ node.replace(html)
+ end
+
+ doc
+ end
+
+ private
+
+ def spaced_link_match(link)
+ match = LINK_PATTERN.match(link)
+ return link unless match && match[1] && match[2]
+
+ # escape the spaces in the url so that it's a valid markdown link,
+ # then run it through the markdown processor again, let it do its magic
+ text = match[1]
+ new_link = match[2].gsub(' ', '%20')
+ title = match[3] ? " \"#{match[3]}\"" : ''
+ html = Banzai::Filter::MarkdownFilter.call("[#{text}](#{new_link}#{title})", context)
+
+ # link is wrapped in a <p>, so strip that off
+ html.sub('<p>', '').chomp('</p>')
+ end
+
+ def spaced_link_filter(text)
+ Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
+ spaced_link_match(link)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/banzai/pipeline/wiki_pipeline.rb b/lib/banzai/pipeline/wiki_pipeline.rb
index c37b8e71cb0..737ff0cc818 100644
--- a/lib/banzai/pipeline/wiki_pipeline.rb
+++ b/lib/banzai/pipeline/wiki_pipeline.rb
@@ -5,6 +5,7 @@ module Banzai
@filters ||= begin
super.insert_after(Filter::TableOfContentsFilter, Filter::GollumTagsFilter)
.insert_before(Filter::TaskListFilter, Filter::WikiLinkFilter)
+ .insert_before(Filter::WikiLinkFilter, Filter::SpacedLinkFilter)
end
end
end
diff --git a/spec/lib/banzai/filter/spaced_link_filter_spec.rb b/spec/lib/banzai/filter/spaced_link_filter_spec.rb
new file mode 100644
index 00000000000..4463c011522
--- /dev/null
+++ b/spec/lib/banzai/filter/spaced_link_filter_spec.rb
@@ -0,0 +1,66 @@
+require 'spec_helper'
+
+describe Banzai::Filter::SpacedLinkFilter do
+ include FilterSpecHelper
+
+ let(:link) { '[example](page slug)' }
+
+ it 'converts slug with spaces to a link' do
+ doc = filter("See #{link}")
+
+ expect(doc.at_css('a').text).to eq 'example'
+ expect(doc.at_css('a')['href']).to eq 'page%20slug'
+ expect(doc.at_css('p')).to eq nil
+ end
+
+ it 'converts slug with spaces and a title to a link' do
+ link = '[example](page slug "title")'
+ doc = filter("See #{link}")
+
+ expect(doc.at_css('a').text).to eq 'example'
+ expect(doc.at_css('a')['href']).to eq 'page%20slug'
+ expect(doc.at_css('a')['title']).to eq 'title'
+ expect(doc.at_css('p')).to eq nil
+ end
+
+ it 'does nothing when markdown_engine is redcarpet' do
+ exp = act = link
+ expect(filter(act, markdown_engine: :redcarpet).to_html).to eq exp
+ end
+
+ it 'does nothing with empty text' do
+ link = '[](page slug)'
+ doc = filter("See #{link}")
+
+ expect(doc.at_css('a')).to eq nil
+ end
+
+ it 'does nothing with an empty slug' do
+ link = '[example]()'
+ doc = filter("See #{link}")
+
+ expect(doc.at_css('a')).to eq nil
+ end
+
+ it 'converts multiple URLs' do
+ link1 = '[first](slug one)'
+ link2 = '[second](http://example.com/slug two)'
+ doc = filter("See #{link1} and #{link2}")
+
+ found_links = doc.css('a')
+
+ expect(found_links.size).to eq(2)
+ expect(found_links[0].text).to eq 'first'
+ expect(found_links[0]['href']).to eq 'slug%20one'
+ expect(found_links[1].text).to eq 'second'
+ expect(found_links[1]['href']).to eq 'http://example.com/slug%20two'
+ end
+
+ described_class::IGNORE_PARENTS.each do |elem|
+ it "ignores valid links contained inside '#{elem}' element" do
+ exp = act = "<#{elem}>See #{link}</#{elem}>"
+
+ expect(filter(act).to_html).to eq exp
+ end
+ end
+end