diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-03-29 23:49:37 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-03-29 23:50:02 +0000 |
commit | 5f3e1225fc725ed303f3dee989e5b84fafb307d8 (patch) | |
tree | 8f56b3905e0e28a873b50940b66ffcddd7bcbd0a /lib/gitlab/url_sanitizer.rb | |
parent | 56ff640a2f919e9d0e450964081381a8eccef5e4 (diff) | |
download | gitlab-ce-5f3e1225fc725ed303f3dee989e5b84fafb307d8.tar.gz |
Add latest changes from gitlab-org/security/gitlab@15-10-stable-ee
Diffstat (limited to 'lib/gitlab/url_sanitizer.rb')
-rw-r--r-- | lib/gitlab/url_sanitizer.rb | 90 |
1 files changed, 59 insertions, 31 deletions
diff --git a/lib/gitlab/url_sanitizer.rb b/lib/gitlab/url_sanitizer.rb index e3bf11b00b4..79e124a58f5 100644 --- a/lib/gitlab/url_sanitizer.rb +++ b/lib/gitlab/url_sanitizer.rb @@ -2,15 +2,37 @@ module Gitlab class UrlSanitizer + include Gitlab::Utils::StrongMemoize + ALLOWED_SCHEMES = %w[http https ssh git].freeze ALLOWED_WEB_SCHEMES = %w[http https].freeze + SCHEMIFIED_SCHEME = 'glschemelessuri' + SCHEMIFY_PLACEHOLDER = "#{SCHEMIFIED_SCHEME}://".freeze + # URI::DEFAULT_PARSER.make_regexp will only match URLs with schemes or + # relative URLs. This section will match schemeless URIs with userinfo + # e.g. user:pass@gitlab.com but will not match scp-style URIs e.g. + # user@server:path/to/file) + # + # The userinfo part is very loose compared to URI's implementation so we + # also match non-escaped userinfo e.g foo:b?r@gitlab.com which should be + # encoded as foo:b%3Fr@gitlab.com + URI_REGEXP = %r{ + (?: + #{URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES)} + | + (?:(?:(?!@)[%#{URI::REGEXP::PATTERN::UNRESERVED}#{URI::REGEXP::PATTERN::RESERVED}])+(?:@)) + (?# negative lookahead ensures this isn't an SCP-style URL: [host]:[rel_path|abs_path] server:path/to/file) + (?!#{URI::REGEXP::PATTERN::HOST}:(?:#{URI::REGEXP::PATTERN::REL_PATH}|#{URI::REGEXP::PATTERN::ABS_PATH})) + #{URI::REGEXP::PATTERN::HOSTPORT} + ) + }x def self.sanitize(content) - regexp = URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES) - - content.gsub(regexp) { |url| new(url).masked_url } - rescue Addressable::URI::InvalidURIError - content.gsub(regexp, '') + content.gsub(URI_REGEXP) do |url| + new(url).masked_url + rescue Addressable::URI::InvalidURIError + '' + end end def self.valid?(url, allowed_schemes: ALLOWED_SCHEMES) @@ -37,34 +59,45 @@ module Gitlab @url = parse_url(url) end + def credentials + @credentials ||= { user: @url.user.presence, password: @url.password.presence } + end + + def user + credentials[:user] + end + def sanitized_url - @sanitized_url ||= safe_url.to_s + safe_url = @url.dup + safe_url.password = nil + safe_url.user = nil + reverse_schemify(safe_url.to_s) end + strong_memoize_attr :sanitized_url def masked_url url = @url.dup url.password = "*****" if url.password.present? url.user = "*****" if url.user.present? - url.to_s - end - - def credentials - @credentials ||= { user: @url.user.presence, password: @url.password.presence } - end - - def user - credentials[:user] + reverse_schemify(url.to_s) end + strong_memoize_attr :masked_url def full_url - @full_url ||= generate_full_url.to_s + return reverse_schemify(@url.to_s) unless valid_credentials? + + url = @url.dup + url.password = encode_percent(credentials[:password]) if credentials[:password].present? + url.user = encode_percent(credentials[:user]) if credentials[:user].present? + reverse_schemify(url.to_s) end + strong_memoize_attr :full_url private def parse_url(url) - url = url.to_s.strip - match = url.match(%r{\A(?:git|ssh|http(?:s?))\://(?:(.+)(?:@))?(.+)}) + url = schemify(url.to_s.strip) + match = url.match(%r{\A(?:(?:#{SCHEMIFIED_SCHEME}|git|ssh|http(?:s?)):)?//(?:(.+)(?:@))?(.+)}o) raw_credentials = match[1] if match if raw_credentials.present? @@ -83,24 +116,19 @@ module Gitlab url end - def generate_full_url - return @url unless valid_credentials? - - @url.dup.tap do |generated| - generated.password = encode_percent(credentials[:password]) if credentials[:password].present? - generated.user = encode_percent(credentials[:user]) if credentials[:user].present? - end + def schemify(url) + # Prepend the placeholder scheme unless the URL has a scheme or is relative + url.prepend(SCHEMIFY_PLACEHOLDER) unless url.starts_with?(%r{(?:#{URI::REGEXP::PATTERN::SCHEME}:)?//}o) + url end - def safe_url - safe_url = @url.dup - safe_url.password = nil - safe_url.user = nil - safe_url + def reverse_schemify(url) + url.slice!(SCHEMIFY_PLACEHOLDER) if url.starts_with?(SCHEMIFY_PLACEHOLDER) + url end def valid_credentials? - credentials && credentials.is_a?(Hash) && credentials.any? + credentials.is_a?(Hash) && credentials.values.any? end def encode_percent(string) |