Render htmlentities correctly for links not supported by Rinku

author: Jarka Kadlecová <jarka@gitlab.com> 2018-02-16 14:33:50 +0100
committer: Jarka Kadlecová <jarka@gitlab.com> 2018-02-21 19:37:08 +0100
commit: 1a09d5cda8e9f6b90b85351a16fcddea351b869f (patch)
tree: 8f122084c92e11a76d7035fe4d5f635be363dd26 /lib
parent: 0ef19f1cfa6163a17e745c36ed6d3f3c51942661 (diff)
download: gitlab-ce-1a09d5cda8e9f6b90b85351a16fcddea351b869f.tar.gz
3 files changed, 13 insertions, 37 deletions
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index b8d2673c1a6..c4990637971 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -26,7 +26,7 @@ module Banzai
       # in the generated link.
       #
       # Rubular: http://rubular.com/r/cxjPyZc7Sb
-      LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)}
+      LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)}
 
       # Text matching LINK_PATTERN inside these elements will not be linked
       IGNORE_PARENTS = %w(a code kbd pre script style).to_set
@@ -35,42 +35,16 @@ module Banzai
       TEXT_QUERY = %Q(descendant-or-self::text()[
         not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
         and contains(., '://')
-        and not(starts-with(., 'http'))
-        and not(starts-with(., 'ftp'))
       ]).freeze
 
       def call
         return doc if context[:autolink] == false
 
-        rinku_parse
         text_parse
       end
 
       private
 
-      # Run the text through Rinku as a first pass
-      #
-      # This will quickly autolink http(s) and ftp links.
-      #
-      # `@doc` will be re-parsed with the HTML String from Rinku.
-      def rinku_parse
-        # Convert the options from a Hash to a String that Rinku expects
-        options = tag_options(link_options)
-
-        # NOTE: We don't parse email links because it will erroneously match
-        # external Commit and CommitRange references.
-        #
-        # The final argument tells Rinku to link short URLs that don't include a
-        # period (e.g., http://localhost:3000/)
-        rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1)
-
-        return if rinku == html
-
-        # Rinku returns a String, so parse it back to a Nokogiri::XML::Document
-        # for further processing.
-        @doc = parse_html(rinku)
-      end
-
       # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
       def contains_unsafe?(scheme)
         return false unless scheme
@@ -79,8 +53,6 @@ module Banzai
         Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
       end
 
-      # Autolinks any text matching LINK_PATTERN that Rinku didn't already
-      # replace
       def text_parse
         doc.xpath(TEXT_QUERY).each do |node|
           content = node.to_html
@@ -113,11 +85,13 @@ module Banzai
         dropped = ($1 || '').html_safe
 
         options = link_options.merge(href: match)
-        content_tag(:a, match, options) + dropped
+        content_tag(:a, match.html_safe, options) + dropped
       end
 
       def autolink_filter(text)
-        text.gsub(LINK_PATTERN) { |match| autolink_match(match) }
+        Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
+          autolink_match(link)
+        end
       end
 
       def link_options
diff --git a/lib/gitlab/string_range_marker.rb b/lib/gitlab/string_range_marker.rb
index f9faa134206..c6ad997a4d4 100644
--- a/lib/gitlab/string_range_marker.rb
+++ b/lib/gitlab/string_range_marker.rb
@@ -14,7 +14,7 @@ module Gitlab
     end
 
     def mark(marker_ranges)
-      return rich_line unless marker_ranges
+      return rich_line unless marker_ranges&.any?
 
       if html_escaped
         rich_marker_ranges = []
diff --git a/lib/gitlab/string_regex_marker.rb b/lib/gitlab/string_regex_marker.rb
index 7ebf1c0428c..b19aa6dea35 100644
--- a/lib/gitlab/string_regex_marker.rb
+++ b/lib/gitlab/string_regex_marker.rb
@@ -1,13 +1,15 @@
 module Gitlab
   class StringRegexMarker < StringRangeMarker
     def mark(regex, group: 0, &block)
-      regex_match = raw_line.match(regex)
-      return rich_line unless regex_match
+      ranges = []
 
-      begin_index, end_index = regex_match.offset(group)
-      name_range = begin_index..(end_index - 1)
+      raw_line.scan(regex) do
+        begin_index, end_index = Regexp.last_match.offset(group)
 
-      super([name_range], &block)
+        ranges << (begin_index..(end_index - 1))
+      end
+
+      super(ranges, &block)
     end
   end
 end
author	Jarka Kadlecová <jarka@gitlab.com>	2018-02-16 14:33:50 +0100
committer	Jarka Kadlecová <jarka@gitlab.com>	2018-02-21 19:37:08 +0100
commit	1a09d5cda8e9f6b90b85351a16fcddea351b869f (patch)
tree	8f122084c92e11a76d7035fe4d5f635be363dd26 /lib
parent	0ef19f1cfa6163a17e745c36ed6d3f3c51942661 (diff)
download	gitlab-ce-1a09d5cda8e9f6b90b85351a16fcddea351b869f.tar.gz