From 896e09d055979cdfe6e20a8b5939c9a263f7e48a Mon Sep 17 00:00:00 2001 From: James Lopez Date: Mon, 20 Jun 2016 15:31:03 +0200 Subject: started working on a migration for projects that have current import_url issues --- ...20160620110927_fix_no_validatable_import_url.rb | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 db/migrate/20160620110927_fix_no_validatable_import_url.rb (limited to 'db') diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb new file mode 100644 index 00000000000..e56a8a0c853 --- /dev/null +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -0,0 +1,52 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class FixNoValidatableImportUrl < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + class SqlBatches + + attr_reader :results, :query + + def initialize(batch_size: 100, query:) + @offset = 0 + @batch_size = batch_size + @query = query + @results = [] + end + + def next + @results = ActiveRecord::Base.connection.execute(batched_sql) + @offset += @batch_size + @results.any? + end + + private + + def batched_sql + "#{@query} OFFSET #{@offset} LIMIT #{@batch_size}" + end + end + + def up + invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) } + end + + def invalid_import_url_project_ids + ids = [] + batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL") + + while batches.nexts + ids += batches.results.map { |result| invalid_url?(result[:import_url]) ? result[:id] : nil } + end + + ids.compact + end + + def invalid_url?(url) + AddressableUrlValidator.new({ attributes: 1 }).valid_url?(url) + end + + def cleanup_import_url(project_id) + execute("UPDATE projects SET mirror = false, import_url = NULL WHERE id = #{project_id}") + end +end -- cgit v1.2.1 From 6d763831d00027600e4da9807e6be3afb47abd4b Mon Sep 17 00:00:00 2001 From: James Lopez Date: Mon, 20 Jun 2016 17:20:53 +0200 Subject: fixed a few MySQL issues and added changelog --- ...20160620110927_fix_no_validatable_import_url.rb | 53 +++++++++++++++++----- 1 file changed, 42 insertions(+), 11 deletions(-) (limited to 'db') diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb index e56a8a0c853..9cb84faaec1 100644 --- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -1,5 +1,9 @@ -# See http://doc.gitlab.com/ce/development/migration_style_guide.html -# for more information on how to write migrations for GitLab. +# Updates project records containing invalid URLs using the AddressableUrlValidator. +# This is optimized assuming the number of invalid records is low, but +# we still need to loop through all the projects with an +import_url+ +# so we use batching for the latter. +# +# This migration is non-reversible as we would have to keep the old data. class FixNoValidatableImportUrl < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers @@ -14,8 +18,8 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration @results = [] end - def next - @results = ActiveRecord::Base.connection.execute(batched_sql) + def next? + @results = ActiveRecord::Base.connection.exec_query(batched_sql) @offset += @batch_size @results.any? end @@ -23,11 +27,36 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration private def batched_sql - "#{@query} OFFSET #{@offset} LIMIT #{@batch_size}" + "#{@query} LIMIT #{@batch_size} OFFSET #{@offset}" + end + end + + # AddressableValidator - Snapshot of AddressableUrlValidator + module AddressableUrlValidatorSnap + extend self + + def valid_url?(value) + return false unless value + + value.strip! + + valid_uri?(value) && valid_protocol?(value) + rescue Addressable::URI::InvalidURIError + false + end + + def valid_uri?(value) + Addressable::URI.parse(value).is_a?(Addressable::URI) + end + + def valid_protocol?(value) + !!(value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/) end end def up + say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.') + invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) } end @@ -35,18 +64,20 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration ids = [] batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL") - while batches.nexts - ids += batches.results.map { |result| invalid_url?(result[:import_url]) ? result[:id] : nil } + while batches.next? + batches.results.each do |result| + ids << result['id'] unless valid_url?(result['import_url']) + end end - ids.compact + ids end - def invalid_url?(url) - AddressableUrlValidator.new({ attributes: 1 }).valid_url?(url) + def valid_url?(url) + AddressableUrlValidatorSnap.valid_url?(url) end def cleanup_import_url(project_id) - execute("UPDATE projects SET mirror = false, import_url = NULL WHERE id = #{project_id}") + execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}") end end -- cgit v1.2.1 From 5b893d603dd68f263129523f13e8eb68b67fe790 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Thu, 30 Jun 2016 13:17:37 +0200 Subject: few changes based on feedback --- db/migrate/20160620110927_fix_no_validatable_import_url.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'db') diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb index 9cb84faaec1..e111691ea3c 100644 --- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -38,8 +38,6 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration def valid_url?(value) return false unless value - value.strip! - valid_uri?(value) && valid_protocol?(value) rescue Addressable::URI::InvalidURIError false @@ -50,11 +48,13 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration end def valid_protocol?(value) - !!(value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/) + value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/ end end def up + return unless defined?(Addressable::URI::InvalidURIError) + say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.') invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) } -- cgit v1.2.1 From ef5713546bacc653f598eb692b728e35abdb8ab7 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Thu, 30 Jun 2016 17:22:56 +0200 Subject: few more changes from suggestions --- db/migrate/20160620110927_fix_no_validatable_import_url.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'db') diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb index e111691ea3c..3e3837ab7e9 100644 --- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -53,7 +53,10 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration end def up - return unless defined?(Addressable::URI::InvalidURIError) + unless defined?(Addressable::URI::InvalidURIError) + say('Skipping cleaning up invalid import URLs as class from Addressable iss missing') + return + end say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.') -- cgit v1.2.1 From 26ce833a2143485bb0485c8b01d78561adf7c86d Mon Sep 17 00:00:00 2001 From: James Lopez Date: Thu, 30 Jun 2016 18:19:34 +0200 Subject: typo --- db/migrate/20160620110927_fix_no_validatable_import_url.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'db') diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb index 3e3837ab7e9..82a616c62d9 100644 --- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -54,7 +54,7 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration def up unless defined?(Addressable::URI::InvalidURIError) - say('Skipping cleaning up invalid import URLs as class from Addressable iss missing') + say('Skipping cleaning up invalid import URLs as class from Addressable is missing') return end -- cgit v1.2.1