diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/background_migration/cleanup_concurrent_type_change.rb | 54 | ||||
-rw-r--r-- | lib/gitlab/background_migration/copy_column.rb | 39 | ||||
-rw-r--r-- | lib/gitlab/database/migration_helpers.rb | 121 |
3 files changed, 204 insertions, 10 deletions
diff --git a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb new file mode 100644 index 00000000000..de622f657b2 --- /dev/null +++ b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Background migration for cleaning up a concurrent column rename. + class CleanupConcurrentTypeChange + include Database::MigrationHelpers + + RESCHEDULE_DELAY = 10.minutes + + # table - The name of the table the migration is performed for. + # old_column - The name of the old (to drop) column. + # new_column - The name of the new column. + def perform(table, old_column, new_column) + return unless column_exists?(:issues, new_column) + + rows_to_migrate = define_model_for(table) + .where(new_column => nil) + .where + .not(old_column => nil) + + if rows_to_migrate.any? + BackgroundMigrationWorker.perform_in( + RESCHEDULE_DELAY, + 'CleanupConcurrentTypeChange', + [table, old_column, new_column] + ) + else + cleanup_concurrent_column_type_change(table, old_column) + end + end + + # These methods are necessary so we can re-use the migration helpers in + # this class. + def connection + ActiveRecord::Base.connection + end + + def method_missing(name, *args, &block) + connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend + end + + def respond_to_missing?(*args) + connection.respond_to?(*args) || super + end + + def define_model_for(table) + Class.new(ActiveRecord::Base) do + self.table_name = table + end + end + end + end +end diff --git a/lib/gitlab/background_migration/copy_column.rb b/lib/gitlab/background_migration/copy_column.rb new file mode 100644 index 00000000000..a2cb215c230 --- /dev/null +++ b/lib/gitlab/background_migration/copy_column.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # CopyColumn is a simple (reusable) background migration that can be used to + # update the value of a column based on the value of another column in the + # same table. + # + # For this background migration to work the table that is migrated _has_ to + # have an `id` column as the primary key. + class CopyColumn + # table - The name of the table that contains the columns. + # copy_from - The column containing the data to copy. + # copy_to - The column to copy the data to. + # start_id - The start ID of the range of rows to update. + # end_id - The end ID of the range of rows to update. + def perform(table, copy_from, copy_to, start_id, end_id) + return unless connection.column_exists?(table, copy_to) + + quoted_table = connection.quote_table_name(table) + quoted_copy_from = connection.quote_column_name(copy_from) + quoted_copy_to = connection.quote_column_name(copy_to) + + # We're using raw SQL here since this job may be frequently executed. As + # a result dynamically defining models would lead to many unnecessary + # schema information queries. + connection.execute <<-SQL.strip_heredoc + UPDATE #{quoted_table} + SET #{quoted_copy_to} = #{quoted_copy_from} + WHERE id BETWEEN #{start_id} AND #{end_id} + SQL + end + + def connection + ActiveRecord::Base.connection + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 3f65bc912de..33171f83692 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -385,10 +385,27 @@ module Gitlab # necessary since we copy over old values further down. change_column_default(table, new, old_col.default) if old_col.default - trigger_name = rename_trigger_name(table, old, new) + install_rename_triggers(table, old, new) + + update_column_in_batches(table, new, Arel::Table.new(table)[old]) + + change_column_null(table, new, false) unless old_col.null + + copy_indexes(table, old, new) + copy_foreign_keys(table, old, new) + end + + # Installs triggers in a table that keep a new column in sync with an old + # one. + # + # table - The name of the table to install the trigger in. + # old_column - The name of the old column. + # new_column - The name of the new column. + def install_rename_triggers(table, old_column, new_column) + trigger_name = rename_trigger_name(table, old_column, new_column) quoted_table = quote_table_name(table) - quoted_old = quote_column_name(old) - quoted_new = quote_column_name(new) + quoted_old = quote_column_name(old_column) + quoted_new = quote_column_name(new_column) if Database.postgresql? install_rename_triggers_for_postgresql(trigger_name, quoted_table, @@ -397,13 +414,6 @@ module Gitlab install_rename_triggers_for_mysql(trigger_name, quoted_table, quoted_old, quoted_new) end - - update_column_in_batches(table, new, Arel::Table.new(table)[old]) - - change_column_null(table, new, false) unless old_col.null - - copy_indexes(table, old, new) - copy_foreign_keys(table, old, new) end # Changes the type of a column concurrently. @@ -455,6 +465,97 @@ module Gitlab remove_column(table, old) end + # Changes the column type of a table using a background migration. + # + # Because this method uses a background migration it's more suitable for + # large tables. For small tables it's better to use + # `change_column_type_concurrently` since it can complete its work in a + # much shorter amount of time and doesn't rely on Sidekiq. + # + # Example usage: + # + # class Issue < ActiveRecord::Base + # self.table_name = 'issues' + # + # include EachBatch + # + # def self.to_migrate + # where('closed_at IS NOT NULL') + # end + # end + # + # change_column_type_using_background_migration( + # Issue.to_migrate, + # :closed_at, + # :datetime_with_timezone + # ) + # + # Reverting a migration like this is done exactly the same way, just with + # a different type to migrate to (e.g. `:datetime` in the above example). + # + # relation - An ActiveRecord relation to use for scheduling jobs and + # figuring out what table we're modifying. This relation _must_ + # have the EachBatch module included. + # + # column - The name of the column for which the type will be changed. + # + # new_type - The new type of the column. + # + # batch_size - The number of rows to schedule in a single background + # migration. + # + # interval - The time interval between every background migration. + def change_column_type_using_background_migration( + relation, + column, + new_type, + batch_size: 10_000, + interval: 10.minutes + ) + unless relation.model < EachBatch + raise TypeError, 'The relation must include the EachBatch module' + end + + temp_column = "#{column}_for_type_change" + table = relation.table_name + max_index = 0 + + add_column(table, temp_column, new_type) + install_rename_triggers(table, column, temp_column) + + # Schedule the jobs that will copy the data from the old column to the + # new one. + relation.each_batch(of: batch_size) do |batch, index| + start_id, end_id = batch.pluck('MIN(id), MAX(id)').first + max_index = index + + BackgroundMigrationWorker.perform_in( + index * interval, + 'CopyColumn', + [table, column, temp_column, start_id, end_id] + ) + end + + # Schedule the renaming of the column to happen (initially) 1 hour after + # the last batch finished. + BackgroundMigrationWorker.perform_in( + (max_index * interval) + 1.hour, + 'CleanupConcurrentTypeChange', + [table, column, temp_column] + ) + + if perform_background_migration_inline? + # To ensure the schema is up to date immediately we perform the + # migration inline in dev / test environments. + Gitlab::BackgroundMigration.steal('CopyColumn') + Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange') + end + end + + def perform_background_migration_inline? + Rails.env.test? || Rails.env.development? + end + # Performs a concurrent column rename when using PostgreSQL. def install_rename_triggers_for_postgresql(trigger, table, old, new) execute <<-EOF.strip_heredoc |