summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2018-01-04 10:14:06 +0000
committerSean McGivern <sean@mcgivern.me.uk>2018-01-04 10:14:06 +0000
commitac409fb44402622dfd6abb076f7a85df4b27d39d (patch)
treee32e266d1ab620e70486926f31bafcc4e3e7ae88
parentb1e1990ee263bcae73f0e55526a55cff66103220 (diff)
parent78d22fb20db14c90861318b9f316466fbf002114 (diff)
downloadgitlab-ce-ac409fb44402622dfd6abb076f7a85df4b27d39d.tar.gz
Merge branch 'change-issues-closed-at-background-migration' into 'master'
Use a background migration for migrating issues.closed_at See merge request gitlab-org/gitlab-ce!16083
-rw-r--r--changelogs/unreleased/change-issues-closed-at-background-migration.yml5
-rw-r--r--db/post_migrate/20171221140220_schedule_issues_closed_at_type_change.rb45
-rw-r--r--db/schema.rb2
-rw-r--r--doc/development/what_requires_downtime.md57
-rw-r--r--lib/gitlab/background_migration/cleanup_concurrent_type_change.rb54
-rw-r--r--lib/gitlab/background_migration/copy_column.rb39
-rw-r--r--lib/gitlab/database/migration_helpers.rb121
-rw-r--r--spec/lib/gitlab/database/migration_helpers_spec.rb91
8 files changed, 402 insertions, 12 deletions
diff --git a/changelogs/unreleased/change-issues-closed-at-background-migration.yml b/changelogs/unreleased/change-issues-closed-at-background-migration.yml
new file mode 100644
index 00000000000..1c81c6a889e
--- /dev/null
+++ b/changelogs/unreleased/change-issues-closed-at-background-migration.yml
@@ -0,0 +1,5 @@
+---
+title: Use a background migration for issues.closed_at
+merge_request:
+author:
+type: other
diff --git a/db/post_migrate/20171221140220_schedule_issues_closed_at_type_change.rb b/db/post_migrate/20171221140220_schedule_issues_closed_at_type_change.rb
new file mode 100644
index 00000000000..be18c5866ae
--- /dev/null
+++ b/db/post_migrate/20171221140220_schedule_issues_closed_at_type_change.rb
@@ -0,0 +1,45 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class ScheduleIssuesClosedAtTypeChange < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ class Issue < ActiveRecord::Base
+ self.table_name = 'issues'
+ include EachBatch
+
+ def self.to_migrate
+ where('closed_at IS NOT NULL')
+ end
+ end
+
+ def up
+ return unless migrate_column_type?
+
+ change_column_type_using_background_migration(
+ Issue.to_migrate,
+ :closed_at,
+ :datetime_with_timezone
+ )
+ end
+
+ def down
+ return if migrate_column_type?
+
+ change_column_type_using_background_migration(
+ Issue.to_migrate,
+ :closed_at,
+ :datetime
+ )
+ end
+
+ def migrate_column_type?
+ # Some environments may have already executed the previous version of this
+ # migration, thus we don't need to migrate those environments again.
+ column_for('issues', 'closed_at').type == :datetime
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 9d3b5db117e..925629f28fb 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20171220191323) do
+ActiveRecord::Schema.define(version: 20171221140220) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
diff --git a/doc/development/what_requires_downtime.md b/doc/development/what_requires_downtime.md
index 05e0a64af18..9d0c62ecc35 100644
--- a/doc/development/what_requires_downtime.md
+++ b/doc/development/what_requires_downtime.md
@@ -195,6 +195,63 @@ end
And that's it, we're done!
+## Changing Column Types For Large Tables
+
+While `change_column_type_concurrently` can be used for changing the type of a
+column without downtime it doesn't work very well for large tables. Because all
+of the work happens in sequence the migration can take a very long time to
+complete, preventing a deployment from proceeding.
+`change_column_type_concurrently` can also produce a lot of pressure on the
+database due to it rapidly updating many rows in sequence.
+
+To reduce database pressure you should instead use
+`change_column_type_using_background_migration` when migrating a column in a
+large table (e.g. `issues`). This method works similar to
+`change_column_type_concurrently` but uses background migration to spread the
+work / load over a longer time period, without slowing down deployments.
+
+Usage of this method is fairly simple:
+
+```ruby
+class ExampleMigration < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ disable_ddl_transaction!
+
+ class Issue < ActiveRecord::Base
+ self.table_name = 'issues'
+
+ include EachBatch
+
+ def self.to_migrate
+ where('closed_at IS NOT NULL')
+ end
+ end
+
+ def up
+ change_column_type_using_background_migration(
+ Issue.to_migrate,
+ :closed_at,
+ :datetime_with_timezone
+ )
+ end
+
+ def down
+ change_column_type_using_background_migration(
+ Issue.to_migrate,
+ :closed_at,
+ :datetime
+ )
+ end
+end
+```
+
+This would change the type of `issues.closed_at` to `timestamp with time zone`.
+
+Keep in mind that the relation passed to
+`change_column_type_using_background_migration` _must_ include `EachBatch`,
+otherwise it will raise a `TypeError`.
+
## Adding Indexes
Adding indexes is an expensive process that blocks INSERT and UPDATE queries for
diff --git a/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb
new file mode 100644
index 00000000000..de622f657b2
--- /dev/null
+++ b/lib/gitlab/background_migration/cleanup_concurrent_type_change.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Background migration for cleaning up a concurrent column rename.
+ class CleanupConcurrentTypeChange
+ include Database::MigrationHelpers
+
+ RESCHEDULE_DELAY = 10.minutes
+
+ # table - The name of the table the migration is performed for.
+ # old_column - The name of the old (to drop) column.
+ # new_column - The name of the new column.
+ def perform(table, old_column, new_column)
+ return unless column_exists?(:issues, new_column)
+
+ rows_to_migrate = define_model_for(table)
+ .where(new_column => nil)
+ .where
+ .not(old_column => nil)
+
+ if rows_to_migrate.any?
+ BackgroundMigrationWorker.perform_in(
+ RESCHEDULE_DELAY,
+ 'CleanupConcurrentTypeChange',
+ [table, old_column, new_column]
+ )
+ else
+ cleanup_concurrent_column_type_change(table, old_column)
+ end
+ end
+
+ # These methods are necessary so we can re-use the migration helpers in
+ # this class.
+ def connection
+ ActiveRecord::Base.connection
+ end
+
+ def method_missing(name, *args, &block)
+ connection.__send__(name, *args, &block) # rubocop: disable GitlabSecurity/PublicSend
+ end
+
+ def respond_to_missing?(*args)
+ connection.respond_to?(*args) || super
+ end
+
+ def define_model_for(table)
+ Class.new(ActiveRecord::Base) do
+ self.table_name = table
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/copy_column.rb b/lib/gitlab/background_migration/copy_column.rb
new file mode 100644
index 00000000000..a2cb215c230
--- /dev/null
+++ b/lib/gitlab/background_migration/copy_column.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # CopyColumn is a simple (reusable) background migration that can be used to
+ # update the value of a column based on the value of another column in the
+ # same table.
+ #
+ # For this background migration to work the table that is migrated _has_ to
+ # have an `id` column as the primary key.
+ class CopyColumn
+ # table - The name of the table that contains the columns.
+ # copy_from - The column containing the data to copy.
+ # copy_to - The column to copy the data to.
+ # start_id - The start ID of the range of rows to update.
+ # end_id - The end ID of the range of rows to update.
+ def perform(table, copy_from, copy_to, start_id, end_id)
+ return unless connection.column_exists?(table, copy_to)
+
+ quoted_table = connection.quote_table_name(table)
+ quoted_copy_from = connection.quote_column_name(copy_from)
+ quoted_copy_to = connection.quote_column_name(copy_to)
+
+ # We're using raw SQL here since this job may be frequently executed. As
+ # a result dynamically defining models would lead to many unnecessary
+ # schema information queries.
+ connection.execute <<-SQL.strip_heredoc
+ UPDATE #{quoted_table}
+ SET #{quoted_copy_to} = #{quoted_copy_from}
+ WHERE id BETWEEN #{start_id} AND #{end_id}
+ SQL
+ end
+
+ def connection
+ ActiveRecord::Base.connection
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 3f65bc912de..33171f83692 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -385,10 +385,27 @@ module Gitlab
# necessary since we copy over old values further down.
change_column_default(table, new, old_col.default) if old_col.default
- trigger_name = rename_trigger_name(table, old, new)
+ install_rename_triggers(table, old, new)
+
+ update_column_in_batches(table, new, Arel::Table.new(table)[old])
+
+ change_column_null(table, new, false) unless old_col.null
+
+ copy_indexes(table, old, new)
+ copy_foreign_keys(table, old, new)
+ end
+
+ # Installs triggers in a table that keep a new column in sync with an old
+ # one.
+ #
+ # table - The name of the table to install the trigger in.
+ # old_column - The name of the old column.
+ # new_column - The name of the new column.
+ def install_rename_triggers(table, old_column, new_column)
+ trigger_name = rename_trigger_name(table, old_column, new_column)
quoted_table = quote_table_name(table)
- quoted_old = quote_column_name(old)
- quoted_new = quote_column_name(new)
+ quoted_old = quote_column_name(old_column)
+ quoted_new = quote_column_name(new_column)
if Database.postgresql?
install_rename_triggers_for_postgresql(trigger_name, quoted_table,
@@ -397,13 +414,6 @@ module Gitlab
install_rename_triggers_for_mysql(trigger_name, quoted_table,
quoted_old, quoted_new)
end
-
- update_column_in_batches(table, new, Arel::Table.new(table)[old])
-
- change_column_null(table, new, false) unless old_col.null
-
- copy_indexes(table, old, new)
- copy_foreign_keys(table, old, new)
end
# Changes the type of a column concurrently.
@@ -455,6 +465,97 @@ module Gitlab
remove_column(table, old)
end
+ # Changes the column type of a table using a background migration.
+ #
+ # Because this method uses a background migration it's more suitable for
+ # large tables. For small tables it's better to use
+ # `change_column_type_concurrently` since it can complete its work in a
+ # much shorter amount of time and doesn't rely on Sidekiq.
+ #
+ # Example usage:
+ #
+ # class Issue < ActiveRecord::Base
+ # self.table_name = 'issues'
+ #
+ # include EachBatch
+ #
+ # def self.to_migrate
+ # where('closed_at IS NOT NULL')
+ # end
+ # end
+ #
+ # change_column_type_using_background_migration(
+ # Issue.to_migrate,
+ # :closed_at,
+ # :datetime_with_timezone
+ # )
+ #
+ # Reverting a migration like this is done exactly the same way, just with
+ # a different type to migrate to (e.g. `:datetime` in the above example).
+ #
+ # relation - An ActiveRecord relation to use for scheduling jobs and
+ # figuring out what table we're modifying. This relation _must_
+ # have the EachBatch module included.
+ #
+ # column - The name of the column for which the type will be changed.
+ #
+ # new_type - The new type of the column.
+ #
+ # batch_size - The number of rows to schedule in a single background
+ # migration.
+ #
+ # interval - The time interval between every background migration.
+ def change_column_type_using_background_migration(
+ relation,
+ column,
+ new_type,
+ batch_size: 10_000,
+ interval: 10.minutes
+ )
+ unless relation.model < EachBatch
+ raise TypeError, 'The relation must include the EachBatch module'
+ end
+
+ temp_column = "#{column}_for_type_change"
+ table = relation.table_name
+ max_index = 0
+
+ add_column(table, temp_column, new_type)
+ install_rename_triggers(table, column, temp_column)
+
+ # Schedule the jobs that will copy the data from the old column to the
+ # new one.
+ relation.each_batch(of: batch_size) do |batch, index|
+ start_id, end_id = batch.pluck('MIN(id), MAX(id)').first
+ max_index = index
+
+ BackgroundMigrationWorker.perform_in(
+ index * interval,
+ 'CopyColumn',
+ [table, column, temp_column, start_id, end_id]
+ )
+ end
+
+ # Schedule the renaming of the column to happen (initially) 1 hour after
+ # the last batch finished.
+ BackgroundMigrationWorker.perform_in(
+ (max_index * interval) + 1.hour,
+ 'CleanupConcurrentTypeChange',
+ [table, column, temp_column]
+ )
+
+ if perform_background_migration_inline?
+ # To ensure the schema is up to date immediately we perform the
+ # migration inline in dev / test environments.
+ Gitlab::BackgroundMigration.steal('CopyColumn')
+ Gitlab::BackgroundMigration.steal('CleanupConcurrentTypeChange')
+ end
+ end
+
+ def perform_background_migration_inline?
+ Rails.env.test? || Rails.env.development?
+ end
+
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql(trigger, table, old, new)
execute <<-EOF.strip_heredoc
diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb
index 664ba0f7234..7727a1d81b1 100644
--- a/spec/lib/gitlab/database/migration_helpers_spec.rb
+++ b/spec/lib/gitlab/database/migration_helpers_spec.rb
@@ -902,7 +902,7 @@ describe Gitlab::Database::MigrationHelpers do
describe '#check_trigger_permissions!' do
it 'does nothing when the user has the correct permissions' do
expect { model.check_trigger_permissions!('users') }
- .not_to raise_error(RuntimeError)
+ .not_to raise_error
end
it 'raises RuntimeError when the user does not have the correct permissions' do
@@ -1036,4 +1036,93 @@ describe Gitlab::Database::MigrationHelpers do
end
end
end
+
+ describe '#change_column_type_using_background_migration' do
+ let!(:issue) { create(:issue) }
+
+ let(:issue_model) do
+ Class.new(ActiveRecord::Base) do
+ self.table_name = 'issues'
+ include EachBatch
+ end
+ end
+
+ it 'changes the type of a column using a background migration' do
+ expect(model)
+ .to receive(:add_column)
+ .with('issues', 'closed_at_for_type_change', :datetime_with_timezone)
+
+ expect(model)
+ .to receive(:install_rename_triggers)
+ .with('issues', :closed_at, 'closed_at_for_type_change')
+
+ expect(BackgroundMigrationWorker)
+ .to receive(:perform_in)
+ .ordered
+ .with(
+ 10.minutes,
+ 'CopyColumn',
+ ['issues', :closed_at, 'closed_at_for_type_change', issue.id, issue.id]
+ )
+
+ expect(BackgroundMigrationWorker)
+ .to receive(:perform_in)
+ .ordered
+ .with(
+ 1.hour + 10.minutes,
+ 'CleanupConcurrentTypeChange',
+ ['issues', :closed_at, 'closed_at_for_type_change']
+ )
+
+ expect(Gitlab::BackgroundMigration)
+ .to receive(:steal)
+ .ordered
+ .with('CopyColumn')
+
+ expect(Gitlab::BackgroundMigration)
+ .to receive(:steal)
+ .ordered
+ .with('CleanupConcurrentTypeChange')
+
+ model.change_column_type_using_background_migration(
+ issue_model.all,
+ :closed_at,
+ :datetime_with_timezone
+ )
+ end
+ end
+
+ describe '#perform_background_migration_inline?' do
+ it 'returns true in a test environment' do
+ allow(Rails.env)
+ .to receive(:test?)
+ .and_return(true)
+
+ expect(model.perform_background_migration_inline?).to eq(true)
+ end
+
+ it 'returns true in a development environment' do
+ allow(Rails.env)
+ .to receive(:test?)
+ .and_return(false)
+
+ allow(Rails.env)
+ .to receive(:development?)
+ .and_return(true)
+
+ expect(model.perform_background_migration_inline?).to eq(true)
+ end
+
+ it 'returns false in a production environment' do
+ allow(Rails.env)
+ .to receive(:test?)
+ .and_return(false)
+
+ allow(Rails.env)
+ .to receive(:development?)
+ .and_return(false)
+
+ expect(model.perform_background_migration_inline?).to eq(false)
+ end
+ end
end