summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Speicher <robert@gitlab.com>2016-07-06 15:06:01 +0000
committerRobert Speicher <robert@gitlab.com>2016-07-06 15:06:01 +0000
commitbe018ba8c4f61babfea494a3946df9931d476a8a (patch)
treecf5acc63374a7a570ae03deaf1800b183806be07
parent400f9f72233c6c5390367a95bf11ebee09c86d2c (diff)
parent54a50bf81d7bb304adaedffd8eb3e0bc0fc348a9 (diff)
downloadgitlab-ce-be018ba8c4f61babfea494a3946df9931d476a8a.tar.gz
Merge branch 'fix/import-url-validator' into 'master'
Fixing URL validation for import_url on projects Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/17536 This MR fixes problems related to bypassing `import_url` validation on projects. This makes sure the URL is properly validated so we don't enter crap and fail while running workers that handle this URL. It also adds a migration to fix current invalid `import_url`s See merge request !4753
-rw-r--r--CHANGELOG1
-rw-r--r--app/models/project.rb6
-rw-r--r--app/validators/addressable_url_validator.rb45
-rw-r--r--db/migrate/20160620110927_fix_no_validatable_import_url.rb86
-rw-r--r--lib/gitlab/url_sanitizer.rb10
-rw-r--r--spec/models/project_spec.rb12
6 files changed, 156 insertions, 4 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 8ef934bf80d..406dec09e79 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -37,6 +37,7 @@ v 8.10.0 (unreleased)
- Metrics for Rouge::Plugins::Redcarpet and Rouge::Formatters::HTMLGitlab
- RailsCache metris now includes fetch_hit/fetch_miss and read_hit/read_miss info.
- Allow [ci skip] to be in any case and allow [skip ci]. !4785 (simon_w)
+ - Set import_url validation to be more strict
- Add basic system information like memory and disk usage to the admin panel
- Don't garbage collect commits that have related DB records like comments
- More descriptive message for git hooks and file locks
diff --git a/app/models/project.rb b/app/models/project.rb
index e5fae15cb19..029026a4e56 100644
--- a/app/models/project.rb
+++ b/app/models/project.rb
@@ -162,9 +162,7 @@ class Project < ActiveRecord::Base
validates :namespace, presence: true
validates_uniqueness_of :name, scope: :namespace_id
validates_uniqueness_of :path, scope: :namespace_id
- validates :import_url,
- url: { protocols: %w(ssh git http https) },
- if: :external_import?
+ validates :import_url, addressable_url: true, if: :external_import?
validates :star_count, numericality: { greater_than_or_equal_to: 0 }
validate :check_limit, on: :create
validate :avatar_type,
@@ -463,6 +461,8 @@ class Project < ActiveRecord::Base
end
def import_url=(value)
+ return super(value) unless Gitlab::UrlSanitizer.valid?(value)
+
import_url = Gitlab::UrlSanitizer.new(value)
create_or_update_import_data(credentials: import_url.credentials)
super(import_url.sanitized_url)
diff --git a/app/validators/addressable_url_validator.rb b/app/validators/addressable_url_validator.rb
new file mode 100644
index 00000000000..09bfa613cbe
--- /dev/null
+++ b/app/validators/addressable_url_validator.rb
@@ -0,0 +1,45 @@
+# AddressableUrlValidator
+#
+# Custom validator for URLs. This is a stricter version of UrlValidator - it also checks
+# for using the right protocol, but it actually parses the URL checking for any syntax errors.
+# The regex is also different from `URI` as we use `Addressable::URI` here.
+#
+# By default, only URLs for http, https, ssh, and git protocols will be considered valid.
+# Provide a `:protocols` option to configure accepted protocols.
+#
+# Example:
+#
+# class User < ActiveRecord::Base
+# validates :personal_url, addressable_url: true
+#
+# validates :ftp_url, addressable_url: { protocols: %w(ftp) }
+#
+# validates :git_url, addressable_url: { protocols: %w(http https ssh git) }
+# end
+#
+class AddressableUrlValidator < ActiveModel::EachValidator
+ DEFAULT_OPTIONS = { protocols: %w(http https ssh git) }
+
+ def validate_each(record, attribute, value)
+ unless valid_url?(value)
+ record.errors.add(attribute, "must be a valid URL")
+ end
+ end
+
+ private
+
+ def valid_url?(value)
+ return false unless value
+
+ valid_protocol?(value) && valid_uri?(value)
+ end
+
+ def valid_uri?(value)
+ Gitlab::UrlSanitizer.valid?(value)
+ end
+
+ def valid_protocol?(value)
+ options = DEFAULT_OPTIONS.merge(self.options)
+ value =~ /\A#{URI.regexp(options[:protocols])}\z/
+ end
+end
diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb
new file mode 100644
index 00000000000..82a616c62d9
--- /dev/null
+++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb
@@ -0,0 +1,86 @@
+# Updates project records containing invalid URLs using the AddressableUrlValidator.
+# This is optimized assuming the number of invalid records is low, but
+# we still need to loop through all the projects with an +import_url+
+# so we use batching for the latter.
+#
+# This migration is non-reversible as we would have to keep the old data.
+
+class FixNoValidatableImportUrl < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+ class SqlBatches
+
+ attr_reader :results, :query
+
+ def initialize(batch_size: 100, query:)
+ @offset = 0
+ @batch_size = batch_size
+ @query = query
+ @results = []
+ end
+
+ def next?
+ @results = ActiveRecord::Base.connection.exec_query(batched_sql)
+ @offset += @batch_size
+ @results.any?
+ end
+
+ private
+
+ def batched_sql
+ "#{@query} LIMIT #{@batch_size} OFFSET #{@offset}"
+ end
+ end
+
+ # AddressableValidator - Snapshot of AddressableUrlValidator
+ module AddressableUrlValidatorSnap
+ extend self
+
+ def valid_url?(value)
+ return false unless value
+
+ valid_uri?(value) && valid_protocol?(value)
+ rescue Addressable::URI::InvalidURIError
+ false
+ end
+
+ def valid_uri?(value)
+ Addressable::URI.parse(value).is_a?(Addressable::URI)
+ end
+
+ def valid_protocol?(value)
+ value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/
+ end
+ end
+
+ def up
+ unless defined?(Addressable::URI::InvalidURIError)
+ say('Skipping cleaning up invalid import URLs as class from Addressable is missing')
+ return
+ end
+
+ say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.')
+
+ invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) }
+ end
+
+ def invalid_import_url_project_ids
+ ids = []
+ batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL")
+
+ while batches.next?
+ batches.results.each do |result|
+ ids << result['id'] unless valid_url?(result['import_url'])
+ end
+ end
+
+ ids
+ end
+
+ def valid_url?(url)
+ AddressableUrlValidatorSnap.valid_url?(url)
+ end
+
+ def cleanup_import_url(project_id)
+ execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}")
+ end
+end
diff --git a/lib/gitlab/url_sanitizer.rb b/lib/gitlab/url_sanitizer.rb
index 7d02fe3c971..86ed18fb50d 100644
--- a/lib/gitlab/url_sanitizer.rb
+++ b/lib/gitlab/url_sanitizer.rb
@@ -6,8 +6,16 @@ module Gitlab
content.gsub(regexp) { |url| new(url).masked_url }
end
+ def self.valid?(url)
+ Addressable::URI.parse(url.strip)
+
+ true
+ rescue Addressable::URI::InvalidURIError
+ false
+ end
+
def initialize(url, credentials: nil)
- @url = Addressable::URI.parse(url)
+ @url = Addressable::URI.parse(url.strip)
@credentials = credentials
end
diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb
index a8c777d1e3e..2e89d6de3a2 100644
--- a/spec/models/project_spec.rb
+++ b/spec/models/project_spec.rb
@@ -129,6 +129,18 @@ describe Project, models: true do
expect(project2.errors[:repository_storage].first).to match(/is not included in the list/)
end
end
+
+ it 'should not allow an invalid URI as import_url' do
+ project2 = build(:project, import_url: 'invalid://')
+
+ expect(project2).not_to be_valid
+ end
+
+ it 'should allow a valid URI as import_url' do
+ project2 = build(:project, import_url: 'ssh://test@gitlab.com/project.git')
+
+ expect(project2).to be_valid
+ end
end
describe 'default_scope' do