summaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
authorRémy Coutable <remy@rymai.me>2016-07-11 07:53:13 +0000
committerRémy Coutable <remy@rymai.me>2016-07-11 07:53:13 +0000
commit11ba19e3d2ec5cef69bff1fdf12ee643012d6510 (patch)
treedede0f267f9b097a54de57c6ec57e5b62dac462c /db
parentb0b2301842682ba2e626cdc5b72d5e01a8662d2d (diff)
parent99f7b6d24684dcb9dbff79c8ff08f8c7580dcafe (diff)
downloadgitlab-ce-11ba19e3d2ec5cef69bff1fdf12ee643012d6510.tar.gz
Merge branch 'fix/remove-import-url-migration' into 'master'
Remove slow migration and add fix to sanitize errors Getting rid of the slow migration and preventing the Error to occur Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/19608 See merge request !5176
Diffstat (limited to 'db')
-rw-r--r--db/migrate/20160620110927_fix_no_validatable_import_url.rb106
1 files changed, 0 insertions, 106 deletions
diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb
deleted file mode 100644
index a3f5073d511..00000000000
--- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb
+++ /dev/null
@@ -1,106 +0,0 @@
-# Updates project records containing invalid URLs using the AddressableUrlValidator.
-# This is optimized assuming the number of invalid records is low, but
-# we still need to loop through all the projects with an +import_url+
-# so we use batching for the latter.
-#
-# This migration is non-reversible as we would have to keep the old data.
-
-class FixNoValidatableImportUrl < ActiveRecord::Migration
- include Gitlab::Database::MigrationHelpers
- class SqlBatches
-
- attr_reader :results, :query
-
- def initialize(batch_size: 1000, query:)
- @offset = 0
- @batch_size = batch_size
- @query = query
- @results = []
- end
-
- def next?
- @results = ActiveRecord::Base.connection.exec_query(batched_sql)
- @offset += @batch_size
- @results.any?
- end
-
- private
-
- def batched_sql
- "#{@query} LIMIT #{@batch_size} OFFSET #{@offset}"
- end
- end
-
- # AddressableValidator - Snapshot of AddressableUrlValidator
- module AddressableUrlValidatorSnap
- extend self
-
- def valid_url?(value)
- return false unless value
-
- valid_uri?(value) && valid_protocol?(value)
- rescue Addressable::URI::InvalidURIError
- false
- end
-
- def valid_uri?(value)
- Addressable::URI.parse(value).is_a?(Addressable::URI)
- end
-
- def valid_protocol?(value)
- value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/
- end
- end
-
- def up
- unless defined?(Addressable::URI::InvalidURIError)
- say('Skipping cleaning up invalid import URLs as class from Addressable is missing')
- return
- end
-
- say('Nullifying empty import URLs')
-
- nullify_empty_urls
-
- say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.')
-
- process_invalid_import_urls
- end
-
- def process_invalid_import_urls
- batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL")
-
- while batches.next?
- project_ids = []
-
- batches.results.each do |result|
- project_ids << result['id'] unless valid_url?(result['import_url'])
- end
-
- process_batch(project_ids)
- end
-
- end
-
- def process_batch(project_ids)
- Thread.new do
- begin
- project_ids.each { |project_id| cleanup_import_url(project_id) }
- ensure
- ActiveRecord::Base.connection.close
- end
- end.join
- end
-
- def valid_url?(url)
- AddressableUrlValidatorSnap.valid_url?(url)
- end
-
- def cleanup_import_url(project_id)
- execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}")
- end
-
- def nullify_empty_urls
- execute("UPDATE projects SET import_url = NULL WHERE import_url = ''")
- end
-end