diff options
author | Jarka Kadlecová <jarka@gitlab.com> | 2018-02-16 14:33:50 +0100 |
---|---|---|
committer | Jarka Kadlecová <jarka@gitlab.com> | 2018-02-21 19:37:08 +0100 |
commit | 1a09d5cda8e9f6b90b85351a16fcddea351b869f (patch) | |
tree | 8f122084c92e11a76d7035fe4d5f635be363dd26 /lib | |
parent | 0ef19f1cfa6163a17e745c36ed6d3f3c51942661 (diff) | |
download | gitlab-ce-1a09d5cda8e9f6b90b85351a16fcddea351b869f.tar.gz |
Render htmlentities correctly for links not supported by Rinku
Diffstat (limited to 'lib')
-rw-r--r-- | lib/banzai/filter/autolink_filter.rb | 36 | ||||
-rw-r--r-- | lib/gitlab/string_range_marker.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/string_regex_marker.rb | 12 |
3 files changed, 13 insertions, 37 deletions
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index b8d2673c1a6..c4990637971 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -26,7 +26,7 @@ module Banzai # in the generated link. # # Rubular: http://rubular.com/r/cxjPyZc7Sb - LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)} + LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)} # Text matching LINK_PATTERN inside these elements will not be linked IGNORE_PARENTS = %w(a code kbd pre script style).to_set @@ -35,42 +35,16 @@ module Banzai TEXT_QUERY = %Q(descendant-or-self::text()[ not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')}) and contains(., '://') - and not(starts-with(., 'http')) - and not(starts-with(., 'ftp')) ]).freeze def call return doc if context[:autolink] == false - rinku_parse text_parse end private - # Run the text through Rinku as a first pass - # - # This will quickly autolink http(s) and ftp links. - # - # `@doc` will be re-parsed with the HTML String from Rinku. - def rinku_parse - # Convert the options from a Hash to a String that Rinku expects - options = tag_options(link_options) - - # NOTE: We don't parse email links because it will erroneously match - # external Commit and CommitRange references. - # - # The final argument tells Rinku to link short URLs that don't include a - # period (e.g., http://localhost:3000/) - rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1) - - return if rinku == html - - # Rinku returns a String, so parse it back to a Nokogiri::XML::Document - # for further processing. - @doc = parse_html(rinku) - end - # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme def contains_unsafe?(scheme) return false unless scheme @@ -79,8 +53,6 @@ module Banzai Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } end - # Autolinks any text matching LINK_PATTERN that Rinku didn't already - # replace def text_parse doc.xpath(TEXT_QUERY).each do |node| content = node.to_html @@ -113,11 +85,13 @@ module Banzai dropped = ($1 || '').html_safe options = link_options.merge(href: match) - content_tag(:a, match, options) + dropped + content_tag(:a, match.html_safe, options) + dropped end def autolink_filter(text) - text.gsub(LINK_PATTERN) { |match| autolink_match(match) } + Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:| + autolink_match(link) + end end def link_options diff --git a/lib/gitlab/string_range_marker.rb b/lib/gitlab/string_range_marker.rb index f9faa134206..c6ad997a4d4 100644 --- a/lib/gitlab/string_range_marker.rb +++ b/lib/gitlab/string_range_marker.rb @@ -14,7 +14,7 @@ module Gitlab end def mark(marker_ranges) - return rich_line unless marker_ranges + return rich_line unless marker_ranges&.any? if html_escaped rich_marker_ranges = [] diff --git a/lib/gitlab/string_regex_marker.rb b/lib/gitlab/string_regex_marker.rb index 7ebf1c0428c..b19aa6dea35 100644 --- a/lib/gitlab/string_regex_marker.rb +++ b/lib/gitlab/string_regex_marker.rb @@ -1,13 +1,15 @@ module Gitlab class StringRegexMarker < StringRangeMarker def mark(regex, group: 0, &block) - regex_match = raw_line.match(regex) - return rich_line unless regex_match + ranges = [] - begin_index, end_index = regex_match.offset(group) - name_range = begin_index..(end_index - 1) + raw_line.scan(regex) do + begin_index, end_index = Regexp.last_match.offset(group) - super([name_range], &block) + ranges << (begin_index..(end_index - 1)) + end + + super(ranges, &block) end end end |