summaryrefslogtreecommitdiff
path: root/lib/gitlab/database
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2017-12-21 16:44:07 +0100
committerYorick Peterse <yorickpeterse@gmail.com>2018-01-03 12:28:00 +0100
commit78d22fb20db14c90861318b9f316466fbf002114 (patch)
tree9ac0e16582926c812f56830eab74ea970caea2ac /lib/gitlab/database
parent1dac4271798a3b9ad36c3d985a3f7740cd1c60b3 (diff)
downloadgitlab-ce-78d22fb20db14c90861318b9f316466fbf002114.tar.gz
Use a background migration for issues.closed_atchange-issues-closed-at-background-migration
In a previous attempt (rolled back in https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/16021) we tried to migrate `issues.closed_at` from timestamp to timestamptz using a regular migration. This has a bad impact on GitLab.com and as such was rolled back. This commit re-implements the original migrations using generic background migrations, allowing us to still migrate the data in a single release but without a negative impact on availability. To ensure the database schema is up to date the background migrations are performed inline in development and test environments. We also make sure to not migrate that that doesn't need migrating in the first place or has already been migrated.
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/migration_helpers.rb121
1 files changed, 111 insertions, 10 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 3f65bc912de..33171f83692 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -385,10 +385,27 @@ module Gitlab
# necessary since we copy over old values further down.
change_column_default(table, new, old_col.default) if old_col.default
- trigger_name = rename_trigger_name(table, old, new)
+ install_rename_triggers(table, old, new)
+
+ update_column_in_batches(table, new, Arel::Table.new(table)[old])
+
+ change_column_null(table, new, false) unless old_col.null
+
+ copy_indexes(table, old, new)
+ copy_foreign_keys(table, old, new)
+ end
+
+ # Installs triggers in a table that keep a new column in sync with an old
+ # one.
+ #
+ # table - The name of the table to install the trigger in.
+ # old_column - The name of the old column.
+ # new_column - The name of the new column.
+ def install_rename_triggers(table, old_column, new_column)
+ trigger_name = rename_trigger_name(table, old_column, new_column)
quoted_table = quote_table_name(table)
- quoted_old = quote_column_name(old)
- quoted_new = quote_column_name(new)
+ quoted_old = quote_column_name(old_column)
+ quoted_new = quote_column_name(new_column)
if Database.postgresql?
install_rename_triggers_for_postgresql(trigger_name, quoted_table,
@@ -397,13 +414,6 @@ module Gitlab
install_rename_triggers_for_mysql(trigger_name, quoted_table,
quoted_old, quoted_new)
end
-
- update_column_in_batches(table, new, Arel::Table.new(table)[old])
-
- change_column_null(table, new, false) unless old_col.null
-
- copy_indexes(table, old, new)
- copy_foreign_keys(table, old, new)
end
# Changes the type of a column concurrently.
@@ -455,6 +465,97 @@ module Gitlab
remove_column(table, old)
end
+ # Changes the column type of a table using a background migration.
+ #
+ # Because this method uses a background migration it's more suitable for
+ # large tables. For small tables it's better to use
+ # `change_column_type_concurrently` since it can complete its work in a
+ # much shorter amount of time and doesn't rely on Sidekiq.
+ #
+ # Example usage:
+ #
+ # class Issue < ActiveRecord::Base
+ # self.table_name = 'issues'
+ #
+ # include EachBatch
+ #
+ # def self.to_migrate
+ # where('closed_at IS NOT NULL')
+ # end
+ # end
+ #
+ # change_column_type_using_background_migration(
+ # Issue.to_migrate,
+ # :closed_at,
+ # :datetime_with_timezone
+ # )
+ #
+ # Reverting a migration like this is done exactly the same way, just with
+ # a different type to migrate to (e.g. `:datetime` in the above example).
+ #
+ # relation - An ActiveRecord relation to use for scheduling jobs and
+ # figuring out what table we're modifying. This relation _must_
+ # have the EachBatch module included.
+ #
+ # column - The name of the column for which the type will be changed.
+ #
+ # new_type - The new type of the column.
+ #
+ # batch_size - The number of rows to schedule in a single background
+ # migration.
+ #
+ # interval - The time interval between every background migration.
+ def change_column_type_using_background_migration(
+ relation,
+ column,
+ new_type,
+ batch_size: 10_000,
+ interval: 10.minutes
+ )
+ unless relation.model < EachBatch
+ raise TypeError, 'The relation must include the EachBatch module'
+ end
+
+ temp_column = "#{column}_for_type_change"
+ table = relation.table_name
+ max_index = 0
+
+ add_column(table, temp_column, new_type)
+ install_rename_triggers(table, column, temp_column)
+
+ # Schedule the jobs that will copy the data from the old column to the
+ # new one.
+ relation.each_batch(of: batch_size) do |batch, index|
+ start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
+ max_index = index
+
+ BackgroundMigrationWorker.perform_in(
+ index * interval,
+ 'CopyColumn',
+ [table, column, temp_column, start_id, end_id]
+ )
+ end
+
+ # Schedule the renaming of the column to happen (initially) 1 hour after
+ # the last batch finished.
+ BackgroundMigrationWorker.perform_in(
+ (max_index * interval) + 1.hour,
+ 'CleanupConcurrentTypeChange',
+ [table, column, temp_column]
+ )
+
+ if perform_background_migration_inline?
+ # To ensure the schema is up to date immediately we perform the
+ # migration inline in dev / test environments.
+ Gitlab::BackgroundMigration.steal('CopyColumn')
+ Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange')
+ end
+ end
+
+ def perform_background_migration_inline?
+ Rails.env.test? || Rails.env.development?
+ end
+
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql(trigger, table, old, new)
execute <<-EOF.strip_heredoc