From 6d09e946d22727ce595aeb382685292a1ad8f5a8 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Fri, 8 Jul 2016 10:44:07 +0200 Subject: import_url migration performance improvements Nullifying empty import_urls upfront so the number of projects with import_url not NULL decreases to 1/5. Also, now processing batches in blocks of 1000, with a threaded process - a bit experimental. --- ...20160620110927_fix_no_validatable_import_url.rb | 34 ++++++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'db') diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb index 82a616c62d9..02ff1962e3f 100644 --- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -11,7 +11,7 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration attr_reader :results, :query - def initialize(batch_size: 100, query:) + def initialize(batch_size: 1000, query:) @offset = 0 @batch_size = batch_size @query = query @@ -58,22 +58,40 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration return end + say('Nullifying empty import URLs') + + nullify_empty_urls + say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.') - invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) } + process_invalid_import_urls end - def invalid_import_url_project_ids - ids = [] + def process_invalid_import_urls + @threads = [] batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL") while batches.next? + project_ids = [] + batches.results.each do |result| - ids << result['id'] unless valid_url?(result['import_url']) + project_ids << result['id'] unless valid_url?(result['import_url']) end + + process_batch(project_ids) end - ids + @threads.each(&:join) + end + + def process_batch(project_ids) + @threads << Thread.new do + begin + project_ids.each { |project_id| cleanup_import_url(project_id) } + ensure + ActiveRecord::Base.connection.close + end + end end def valid_url?(url) @@ -83,4 +101,8 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration def cleanup_import_url(project_id) execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}") end + + def nullify_empty_urls + execute("UPDATE projects SET import_url = NULL WHERE import_url = ''") + end end -- cgit v1.2.1 From 2c6fe72265d250e47c03f27dc274b59d3e7e93f5 Mon Sep 17 00:00:00 2001 From: James Lopez Date: Fri, 8 Jul 2016 11:00:30 +0200 Subject: fix thread join issue --- db/migrate/20160620110927_fix_no_validatable_import_url.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'db') diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb index 02ff1962e3f..a3f5073d511 100644 --- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -68,7 +68,6 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration end def process_invalid_import_urls - @threads = [] batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL") while batches.next? @@ -81,17 +80,16 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration process_batch(project_ids) end - @threads.each(&:join) end def process_batch(project_ids) - @threads << Thread.new do + Thread.new do begin project_ids.each { |project_id| cleanup_import_url(project_id) } ensure ActiveRecord::Base.connection.close end - end + end.join end def valid_url?(url) -- cgit v1.2.1