diff options
author | Yorick Peterse <yorickpeterse@gmail.com> | 2018-06-26 14:38:52 +0000 |
---|---|---|
committer | Yorick Peterse <yorickpeterse@gmail.com> | 2018-06-26 14:38:52 +0000 |
commit | 2440b7b6f150ad32a96140023755667b54490e6c (patch) | |
tree | 286614d013c885f0080ece5e9cd93c5b90b0b5f0 | |
parent | 338393791f8893c98df6644b61a2d591474737ac (diff) | |
parent | d3d9077830527c21ece7d9dc1a31a94a290afcdc (diff) | |
download | gitlab-ce-2440b7b6f150ad32a96140023755667b54490e6c.tar.gz |
Merge branch 'add-rename-column-background-helper' into 'master'
Add a helper to rename a column using a background migration
Closes #47591
See merge request gitlab-org/gitlab-ce!20180
6 files changed, 237 insertions, 56 deletions
diff --git a/doc/development/what_requires_downtime.md b/doc/development/what_requires_downtime.md index f502866333e..47396666879 100644 --- a/doc/development/what_requires_downtime.md +++ b/doc/development/what_requires_downtime.md @@ -195,22 +195,22 @@ end And that's it, we're done! -## Changing Column Types For Large Tables +## Changing The Schema For Large Tables -While `change_column_type_concurrently` can be used for changing the type of a -column without downtime it doesn't work very well for large tables. Because all -of the work happens in sequence the migration can take a very long time to -complete, preventing a deployment from proceeding. -`change_column_type_concurrently` can also produce a lot of pressure on the -database due to it rapidly updating many rows in sequence. +While `change_column_type_concurrently` and `rename_column_concurrently` can be +used for changing the schema of a table without downtime it doesn't work very +well for large tables. Because all of the work happens in sequence the migration +can take a very long time to complete, preventing a deployment from proceeding. +They can also produce a lot of pressure on the database due to it rapidly +updating many rows in sequence. To reduce database pressure you should instead use -`change_column_type_using_background_migration` when migrating a column in a -large table (e.g. `issues`). This method works similar to -`change_column_type_concurrently` but uses background migration to spread the -work / load over a longer time period, without slowing down deployments. +`change_column_type_using_background_migration` or `rename_column_concurrently` +when migrating a column in a large table (e.g. `issues`). These methods work +similarly to the concurrent counterparts but uses background migration to spread +the work / load over a longer time period, without slowing down deployments. -Usage of this method is fairly simple: +For example, to change the column type using a background migration: ```ruby class ExampleMigration < ActiveRecord::Migration @@ -296,6 +296,15 @@ class MigrateRemainingIssuesClosedAt < ActiveRecord::Migration end ``` +The same applies to `rename_column_using_background_migration`: + +1. Create a migration using the helper, which will schedule background + migrations to spread the writes over a longer period of time. +2. In the next monthly release, create a clean-up migration to steal from the + Sidekiq queues, migrate any missing rows, and cleanup the rename. This + migration should skip the steps after stealing from the Sidekiq queues if the + column has already been renamed. + For more information, see [the documentation on cleaning up background migrations](background_migrations.md#cleaning-up). diff --git a/lib/gitlab/background_migration/cleanup_concurrent_rename.rb b/lib/gitlab/background_migration/cleanup_concurrent_rename.rb new file mode 100644 index 00000000000..d3f366f3480 --- /dev/null +++ b/lib/gitlab/background_migration/cleanup_concurrent_rename.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Background migration for cleaning up a concurrent column rename. + class CleanupConcurrentRename < CleanupConcurrentSchemaChange + RESCHEDULE_DELAY = 10.minutes + + def cleanup_concurrent_schema_change(table, old_column, new_column) + cleanup_concurrent_column_rename(table, old_column, new_column) + end + end + end +end diff --git a/lib/gitlab/background_migration/cleanup_concurrent_schema_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_schema_change.rb new file mode 100644 index 00000000000..54f77f184d5 --- /dev/null +++ b/lib/gitlab/background_migration/cleanup_concurrent_schema_change.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Base class for cleaning up concurrent schema changes. + class CleanupConcurrentSchemaChange + include Database::MigrationHelpers + + # table - The name of the table the migration is performed for. + # old_column - The name of the old (to drop) column. + # new_column - The name of the new column. + def perform(table, old_column, new_column) + return unless column_exists?(table, new_column) + + rows_to_migrate = define_model_for(table) + .where(new_column => nil) + .where + .not(old_column => nil) + + if rows_to_migrate.any? + BackgroundMigrationWorker.perform_in( + RESCHEDULE_DELAY, + self.class.name, + [table, old_column, new_column] + ) + else + cleanup_concurrent_schema_change(table, old_column, new_column) + end + end + + # These methods are necessary so we can re-use the migration helpers in + # this class. + def connection + ActiveRecord::Base.connection + end + + def method_missing(name, *args, &block) + connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend + end + + def respond_to_missing?(*args) + connection.respond_to?(*args) || super + end + + def define_model_for(table) + Class.new(ActiveRecord::Base) do + self.table_name = table + end + end + end + end +end diff --git a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb index de622f657b2..48411095dbb 100644 --- a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb +++ b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb @@ -2,52 +2,12 @@ module Gitlab module BackgroundMigration - # Background migration for cleaning up a concurrent column rename. - class CleanupConcurrentTypeChange - include Database::MigrationHelpers - + # Background migration for cleaning up a concurrent column type changeb. + class CleanupConcurrentTypeChange < CleanupConcurrentSchemaChange RESCHEDULE_DELAY = 10.minutes - # table - The name of the table the migration is performed for. - # old_column - The name of the old (to drop) column. - # new_column - The name of the new column. - def perform(table, old_column, new_column) - return unless column_exists?(:issues, new_column) - - rows_to_migrate = define_model_for(table) - .where(new_column => nil) - .where - .not(old_column => nil) - - if rows_to_migrate.any? - BackgroundMigrationWorker.perform_in( - RESCHEDULE_DELAY, - 'CleanupConcurrentTypeChange', - [table, old_column, new_column] - ) - else - cleanup_concurrent_column_type_change(table, old_column) - end - end - - # These methods are necessary so we can re-use the migration helpers in - # this class. - def connection - ActiveRecord::Base.connection - end - - def method_missing(name, *args, &block) - connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend - end - - def respond_to_missing?(*args) - connection.respond_to?(*args) || super - end - - def define_model_for(table) - Class.new(ActiveRecord::Base) do - self.table_name = table - end + def cleanup_concurrent_schema_change(table, old_column, new_column) + cleanup_concurrent_column_type_change(table, old_column) end end end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index c21bae5e16b..4fe5b4cc835 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -596,6 +596,97 @@ module Gitlab end end + # Renames a column using a background migration. + # + # Because this method uses a background migration it's more suitable for + # large tables. For small tables it's better to use + # `rename_column_concurrently` since it can complete its work in a much + # shorter amount of time and doesn't rely on Sidekiq. + # + # Example usage: + # + # rename_column_using_background_migration( + # :users, + # :feed_token, + # :rss_token + # ) + # + # table - The name of the database table containing the column. + # + # old - The old column name. + # + # new - The new column name. + # + # type - The type of the new column. If no type is given the old column's + # type is used. + # + # batch_size - The number of rows to schedule in a single background + # migration. + # + # interval - The time interval between every background migration. + def rename_column_using_background_migration( + table, + old_column, + new_column, + type: nil, + batch_size: 10_000, + interval: 10.minutes + ) + + check_trigger_permissions!(table) + + old_col = column_for(table, old_column) + new_type = type || old_col.type + max_index = 0 + + add_column(table, new_column, new_type, + limit: old_col.limit, + precision: old_col.precision, + scale: old_col.scale) + + # We set the default value _after_ adding the column so we don't end up + # updating any existing data with the default value. This isn't + # necessary since we copy over old values further down. + change_column_default(table, new_column, old_col.default) if old_col.default + + install_rename_triggers(table, old_column, new_column) + + model = Class.new(ActiveRecord::Base) do + self.table_name = table + + include ::EachBatch + end + + # Schedule the jobs that will copy the data from the old column to the + # new one. Rows with NULL values in our source column are skipped since + # the target column is already NULL at this point. + model.where.not(old_column => nil).each_batch(of: batch_size) do |batch, index| + start_id, end_id = batch.pluck('MIN(id), MAX(id)').first + max_index = index + + BackgroundMigrationWorker.perform_in( + index * interval, + 'CopyColumn', + [table, old_column, new_column, start_id, end_id] + ) + end + + # Schedule the renaming of the column to happen (initially) 1 hour after + # the last batch finished. + BackgroundMigrationWorker.perform_in( + (max_index * interval) + 1.hour, + 'CleanupConcurrentRename', + [table, old_column, new_column] + ) + + if perform_background_migration_inline? + # To ensure the schema is up to date immediately we perform the + # migration inline in dev / test environments. + Gitlab::BackgroundMigration.steal('CopyColumn') + Gitlab::BackgroundMigration.steal('CleanupConcurrentRename') + end + end + def perform_background_migration_inline? Rails.env.test? || Rails.env.development? end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index 280f799f2ab..eb7148ff108 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -1178,6 +1178,61 @@ describe Gitlab::Database::MigrationHelpers do end end + describe '#rename_column_using_background_migration' do + let!(:issue) { create(:issue, :closed, closed_at: Time.zone.now) } + + it 'renames a column using a background migration' do + expect(model) + .to receive(:add_column) + .with( + 'issues', + :closed_at_timestamp, + :datetime_with_timezone, + limit: anything, + precision: anything, + scale: anything + ) + + expect(model) + .to receive(:install_rename_triggers) + .with('issues', :closed_at, :closed_at_timestamp) + + expect(BackgroundMigrationWorker) + .to receive(:perform_in) + .ordered + .with( + 10.minutes, + 'CopyColumn', + ['issues', :closed_at, :closed_at_timestamp, issue.id, issue.id] + ) + + expect(BackgroundMigrationWorker) + .to receive(:perform_in) + .ordered + .with( + 1.hour + 10.minutes, + 'CleanupConcurrentRename', + ['issues', :closed_at, :closed_at_timestamp] + ) + + expect(Gitlab::BackgroundMigration) + .to receive(:steal) + .ordered + .with('CopyColumn') + + expect(Gitlab::BackgroundMigration) + .to receive(:steal) + .ordered + .with('CleanupConcurrentRename') + + model.rename_column_using_background_migration( + 'issues', + :closed_at, + :closed_at_timestamp + ) + end + end + describe '#perform_background_migration_inline?' do it 'returns true in a test environment' do allow(Rails.env) |