summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2018-01-04 10:14:06 +0000
committerSean McGivern <sean@mcgivern.me.uk>2018-01-04 10:14:06 +0000
commitac409fb44402622dfd6abb076f7a85df4b27d39d (patch)
treee32e266d1ab620e70486926f31bafcc4e3e7ae88 /lib
parentb1e1990ee263bcae73f0e55526a55cff66103220 (diff)
parent78d22fb20db14c90861318b9f316466fbf002114 (diff)
downloadgitlab-ce-ac409fb44402622dfd6abb076f7a85df4b27d39d.tar.gz
Merge branch 'change-issues-closed-at-background-migration' into 'master'
Use a background migration for migrating issues.closed_at See merge request gitlab-org/gitlab-ce!16083
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/background_migration/cleanup_concurrent_type_change.rb54
-rw-r--r--lib/gitlab/background_migration/copy_column.rb39
-rw-r--r--lib/gitlab/database/migration_helpers.rb121
3 files changed, 204 insertions, 10 deletions
diff --git a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb
new file mode 100644
index 00000000000..de622f657b2
--- /dev/null
+++ b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Background migration for cleaning up a concurrent column rename.
+ class CleanupConcurrentTypeChange
+ include Database::MigrationHelpers
+
+ RESCHEDULE_DELAY = 10.minutes
+
+ # table - The name of the table the migration is performed for.
+ # old_column - The name of the old (to drop) column.
+ # new_column - The name of the new column.
+ def perform(table, old_column, new_column)
+ return unless column_exists?(:issues, new_column)
+
+ rows_to_migrate = define_model_for(table)
+ .where(new_column => nil)
+ .where
+ .not(old_column => nil)
+
+ if rows_to_migrate.any?
+ BackgroundMigrationWorker.perform_in(
+ RESCHEDULE_DELAY,
+ 'CleanupConcurrentTypeChange',
+ [table, old_column, new_column]
+ )
+ else
+ cleanup_concurrent_column_type_change(table, old_column)
+ end
+ end
+
+ # These methods are necessary so we can re-use the migration helpers in
+ # this class.
+ def connection
+ ActiveRecord::Base.connection
+ end
+
+ def method_missing(name, *args, &block)
+ connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend
+ end
+
+ def respond_to_missing?(*args)
+ connection.respond_to?(*args) || super
+ end
+
+ def define_model_for(table)
+ Class.new(ActiveRecord::Base) do
+ self.table_name = table
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/copy_column.rb b/lib/gitlab/background_migration/copy_column.rb
new file mode 100644
index 00000000000..a2cb215c230
--- /dev/null
+++ b/lib/gitlab/background_migration/copy_column.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # CopyColumn is a simple (reusable) background migration that can be used to
+ # update the value of a column based on the value of another column in the
+ # same table.
+ #
+ # For this background migration to work the table that is migrated _has_ to
+ # have an `id` column as the primary key.
+ class CopyColumn
+ # table - The name of the table that contains the columns.
+ # copy_from - The column containing the data to copy.
+ # copy_to - The column to copy the data to.
+ # start_id - The start ID of the range of rows to update.
+ # end_id - The end ID of the range of rows to update.
+ def perform(table, copy_from, copy_to, start_id, end_id)
+ return unless connection.column_exists?(table, copy_to)
+
+ quoted_table = connection.quote_table_name(table)
+ quoted_copy_from = connection.quote_column_name(copy_from)
+ quoted_copy_to = connection.quote_column_name(copy_to)
+
+ # We're using raw SQL here since this job may be frequently executed. As
+ # a result dynamically defining models would lead to many unnecessary
+ # schema information queries.
+ connection.execute <<-SQL.strip_heredoc
+ UPDATE #{quoted_table}
+ SET #{quoted_copy_to} = #{quoted_copy_from}
+ WHERE id BETWEEN #{start_id} AND #{end_id}
+ SQL
+ end
+
+ def connection
+ ActiveRecord::Base.connection
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 3f65bc912de..33171f83692 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -385,10 +385,27 @@ module Gitlab
# necessary since we copy over old values further down.
change_column_default(table, new, old_col.default) if old_col.default
- trigger_name = rename_trigger_name(table, old, new)
+ install_rename_triggers(table, old, new)
+
+ update_column_in_batches(table, new, Arel::Table.new(table)[old])
+
+ change_column_null(table, new, false) unless old_col.null
+
+ copy_indexes(table, old, new)
+ copy_foreign_keys(table, old, new)
+ end
+
+ # Installs triggers in a table that keep a new column in sync with an old
+ # one.
+ #
+ # table - The name of the table to install the trigger in.
+ # old_column - The name of the old column.
+ # new_column - The name of the new column.
+ def install_rename_triggers(table, old_column, new_column)
+ trigger_name = rename_trigger_name(table, old_column, new_column)
quoted_table = quote_table_name(table)
- quoted_old = quote_column_name(old)
- quoted_new = quote_column_name(new)
+ quoted_old = quote_column_name(old_column)
+ quoted_new = quote_column_name(new_column)
if Database.postgresql?
install_rename_triggers_for_postgresql(trigger_name, quoted_table,
@@ -397,13 +414,6 @@ module Gitlab
install_rename_triggers_for_mysql(trigger_name, quoted_table,
quoted_old, quoted_new)
end
-
- update_column_in_batches(table, new, Arel::Table.new(table)[old])
-
- change_column_null(table, new, false) unless old_col.null
-
- copy_indexes(table, old, new)
- copy_foreign_keys(table, old, new)
end
# Changes the type of a column concurrently.
@@ -455,6 +465,97 @@ module Gitlab
remove_column(table, old)
end
+ # Changes the column type of a table using a background migration.
+ #
+ # Because this method uses a background migration it's more suitable for
+ # large tables. For small tables it's better to use
+ # `change_column_type_concurrently` since it can complete its work in a
+ # much shorter amount of time and doesn't rely on Sidekiq.
+ #
+ # Example usage:
+ #
+ # class Issue < ActiveRecord::Base
+ # self.table_name = 'issues'
+ #
+ # include EachBatch
+ #
+ # def self.to_migrate
+ # where('closed_at IS NOT NULL')
+ # end
+ # end
+ #
+ # change_column_type_using_background_migration(
+ # Issue.to_migrate,
+ # :closed_at,
+ # :datetime_with_timezone
+ # )
+ #
+ # Reverting a migration like this is done exactly the same way, just with
+ # a different type to migrate to (e.g. `:datetime` in the above example).
+ #
+ # relation - An ActiveRecord relation to use for scheduling jobs and
+ # figuring out what table we're modifying. This relation _must_
+ # have the EachBatch module included.
+ #
+ # column - The name of the column for which the type will be changed.
+ #
+ # new_type - The new type of the column.
+ #
+ # batch_size - The number of rows to schedule in a single background
+ # migration.
+ #
+ # interval - The time interval between every background migration.
+ def change_column_type_using_background_migration(
+ relation,
+ column,
+ new_type,
+ batch_size: 10_000,
+ interval: 10.minutes
+ )
+ unless relation.model < EachBatch
+ raise TypeError, 'The relation must include the EachBatch module'
+ end
+
+ temp_column = "#{column}_for_type_change"
+ table = relation.table_name
+ max_index = 0
+
+ add_column(table, temp_column, new_type)
+ install_rename_triggers(table, column, temp_column)
+
+ # Schedule the jobs that will copy the data from the old column to the
+ # new one.
+ relation.each_batch(of: batch_size) do |batch, index|
+ start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
+ max_index = index
+
+ BackgroundMigrationWorker.perform_in(
+ index * interval,
+ 'CopyColumn',
+ [table, column, temp_column, start_id, end_id]
+ )
+ end
+
+ # Schedule the renaming of the column to happen (initially) 1 hour after
+ # the last batch finished.
+ BackgroundMigrationWorker.perform_in(
+ (max_index * interval) + 1.hour,
+ 'CleanupConcurrentTypeChange',
+ [table, column, temp_column]
+ )
+
+ if perform_background_migration_inline?
+ # To ensure the schema is up to date immediately we perform the
+ # migration inline in dev / test environments.
+ Gitlab::BackgroundMigration.steal('CopyColumn')
+ Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange')
+ end
+ end
+
+ def perform_background_migration_inline?
+ Rails.env.test? || Rails.env.development?
+ end
+
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql(trigger, table, old, new)
execute <<-EOF.strip_heredoc