summaryrefslogtreecommitdiff
path: root/lib/gitlab/url_sanitizer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/url_sanitizer.rb')
-rw-r--r--lib/gitlab/url_sanitizer.rb90
1 files changed, 59 insertions, 31 deletions
diff --git a/lib/gitlab/url_sanitizer.rb b/lib/gitlab/url_sanitizer.rb
index e3bf11b00b4..79e124a58f5 100644
--- a/lib/gitlab/url_sanitizer.rb
+++ b/lib/gitlab/url_sanitizer.rb
@@ -2,15 +2,37 @@
module Gitlab
class UrlSanitizer
+ include Gitlab::Utils::StrongMemoize
+
ALLOWED_SCHEMES = %w[http https ssh git].freeze
ALLOWED_WEB_SCHEMES = %w[http https].freeze
+ SCHEMIFIED_SCHEME = 'glschemelessuri'
+ SCHEMIFY_PLACEHOLDER = "#{SCHEMIFIED_SCHEME}://".freeze
+ # URI::DEFAULT_PARSER.make_regexp will only match URLs with schemes or
+ # relative URLs. This section will match schemeless URIs with userinfo
+ # e.g. user:pass@gitlab.com but will not match scp-style URIs e.g.
+ # user@server:path/to/file)
+ #
+ # The userinfo part is very loose compared to URI's implementation so we
+ # also match non-escaped userinfo e.g foo:b?r@gitlab.com which should be
+ # encoded as foo:b%3Fr@gitlab.com
+ URI_REGEXP = %r{
+ (?:
+ #{URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES)}
+ |
+ (?:(?:(?!@)[%#{URI::REGEXP::PATTERN::UNRESERVED}#{URI::REGEXP::PATTERN::RESERVED}])+(?:@))
+ (?# negative lookahead ensures this isn't an SCP-style URL: [host]:[rel_path|abs_path] server:path/to/file)
+ (?!#{URI::REGEXP::PATTERN::HOST}:(?:#{URI::REGEXP::PATTERN::REL_PATH}|#{URI::REGEXP::PATTERN::ABS_PATH}))
+ #{URI::REGEXP::PATTERN::HOSTPORT}
+ )
+ }x
def self.sanitize(content)
- regexp = URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES)
-
- content.gsub(regexp) { |url| new(url).masked_url }
- rescue Addressable::URI::InvalidURIError
- content.gsub(regexp, '')
+ content.gsub(URI_REGEXP) do |url|
+ new(url).masked_url
+ rescue Addressable::URI::InvalidURIError
+ ''
+ end
end
def self.valid?(url, allowed_schemes: ALLOWED_SCHEMES)
@@ -37,34 +59,45 @@ module Gitlab
@url = parse_url(url)
end
+ def credentials
+ @credentials ||= { user: @url.user.presence, password: @url.password.presence }
+ end
+
+ def user
+ credentials[:user]
+ end
+
def sanitized_url
- @sanitized_url ||= safe_url.to_s
+ safe_url = @url.dup
+ safe_url.password = nil
+ safe_url.user = nil
+ reverse_schemify(safe_url.to_s)
end
+ strong_memoize_attr :sanitized_url
def masked_url
url = @url.dup
url.password = "*****" if url.password.present?
url.user = "*****" if url.user.present?
- url.to_s
- end
-
- def credentials
- @credentials ||= { user: @url.user.presence, password: @url.password.presence }
- end
-
- def user
- credentials[:user]
+ reverse_schemify(url.to_s)
end
+ strong_memoize_attr :masked_url
def full_url
- @full_url ||= generate_full_url.to_s
+ return reverse_schemify(@url.to_s) unless valid_credentials?
+
+ url = @url.dup
+ url.password = encode_percent(credentials[:password]) if credentials[:password].present?
+ url.user = encode_percent(credentials[:user]) if credentials[:user].present?
+ reverse_schemify(url.to_s)
end
+ strong_memoize_attr :full_url
private
def parse_url(url)
- url = url.to_s.strip
- match = url.match(%r{\A(?:git|ssh|http(?:s?))\://(?:(.+)(?:@))?(.+)})
+ url = schemify(url.to_s.strip)
+ match = url.match(%r{\A(?:(?:#{SCHEMIFIED_SCHEME}|git|ssh|http(?:s?)):)?//(?:(.+)(?:@))?(.+)}o)
raw_credentials = match[1] if match
if raw_credentials.present?
@@ -83,24 +116,19 @@ module Gitlab
url
end
- def generate_full_url
- return @url unless valid_credentials?
-
- @url.dup.tap do |generated|
- generated.password = encode_percent(credentials[:password]) if credentials[:password].present?
- generated.user = encode_percent(credentials[:user]) if credentials[:user].present?
- end
+ def schemify(url)
+ # Prepend the placeholder scheme unless the URL has a scheme or is relative
+ url.prepend(SCHEMIFY_PLACEHOLDER) unless url.starts_with?(%r{(?:#{URI::REGEXP::PATTERN::SCHEME}:)?//}o)
+ url
end
- def safe_url
- safe_url = @url.dup
- safe_url.password = nil
- safe_url.user = nil
- safe_url
+ def reverse_schemify(url)
+ url.slice!(SCHEMIFY_PLACEHOLDER) if url.starts_with?(SCHEMIFY_PLACEHOLDER)
+ url
end
def valid_credentials?
- credentials && credentials.is_a?(Hash) && credentials.any?
+ credentials.is_a?(Hash) && credentials.values.any?
end
def encode_percent(string)