diff options
Diffstat (limited to 'lib/gitlab/database/migration_helpers.rb')
-rw-r--r-- | lib/gitlab/database/migration_helpers.rb | 160 |
1 files changed, 75 insertions, 85 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 31e733050e1..d06a73da8ac 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -4,6 +4,7 @@ module Gitlab module Database module MigrationHelpers include Migrations::BackgroundMigrationHelpers + include DynamicModelHelpers # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS MAX_IDENTIFIER_NAME_LENGTH = 63 @@ -576,17 +577,7 @@ module Gitlab # old_column - The name of the old column. # new_column - The name of the new column. def install_rename_triggers(table, old_column, new_column) - trigger_name = rename_trigger_name(table, old_column, new_column) - quoted_table = quote_table_name(table) - quoted_old = quote_column_name(old_column) - quoted_new = quote_column_name(new_column) - - install_rename_triggers_for_postgresql( - trigger_name, - quoted_table, - quoted_old, - quoted_new - ) + install_rename_triggers_for_postgresql(table, old_column, new_column) end # Changes the type of a column concurrently. @@ -927,19 +918,67 @@ module Gitlab # This is crucial for Primary Key conversions, because setting a column # as the PK converts even check constraints to NOT NULL constraints # and forces an inline re-verification of the whole table. - # - It backfills the new column with the values of the existing primary key - # by scheduling background jobs. - # - It tracks the scheduled background jobs through the use of - # Gitlab::Database::BackgroundMigrationJob + # - It sets up a trigger to keep the two columns in sync. + # + # Note: this helper is intended to be used in a regular (pre-deployment) migration. + # + # This helper is part 1 of a multi-step migration process: + # 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers + # 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations + # 3. remaining steps TBD, see #288005 + # + # table - The name of the database table containing the column + # column - The name of the column that we want to convert to bigint. + # primary_key - The name of the primary key column (most often :id) + def initialize_conversion_of_integer_to_bigint(table, column, primary_key: :id) + unless table_exists?(table) + raise "Table #{table} does not exist" + end + + unless column_exists?(table, primary_key) + raise "Column #{primary_key} does not exist on #{table}" + end + + unless column_exists?(table, column) + raise "Column #{column} does not exist on #{table}" + end + + check_trigger_permissions!(table) + + old_column = column_for(table, column) + tmp_column = "#{column}_convert_to_bigint" + + with_lock_retries do + if (column.to_s == primary_key.to_s) || !old_column.null + # If the column to be converted is either a PK or is defined as NOT NULL, + # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow + # That way, we skip the expensive validation step required to add + # a NOT NULL constraint at the end of the process + add_column(table, tmp_column, :bigint, default: old_column.default || 0, null: false) + else + add_column(table, tmp_column, :bigint, default: old_column.default) + end + + install_rename_triggers(table, column, tmp_column) + end + end + + # Backfills the new column used in the conversion of an integer column to bigint using background migrations. + # + # - This helper should be called from a post-deployment migration. + # - In order for this helper to work properly, the new column must be first initialized with + # the `initialize_conversion_of_integer_to_bigint` helper. + # - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration, # which allows a more thorough check that all jobs succeeded in the # cleanup migration and is way faster for very large tables. - # - It sets up a trigger to keep the two columns in sync - # - It does not schedule a cleanup job: we have to do that with followup - # post deployment migrations in the next release. # - # This needs to be done manually by using the - # `cleanup_initialize_conversion_of_integer_to_bigint` - # (not yet implemented - check #288005) + # Note: this helper is intended to be used in a post-deployment migration, to ensure any new code is + # deployed (including background job changes) before we begin processing the background migration. + # + # This helper is part 2 of a multi-step migration process: + # 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers + # 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations + # 3. remaining steps TBD, see #288005 # # table - The name of the database table containing the column # column - The name of the column that we want to convert to bigint. @@ -960,7 +999,7 @@ module Gitlab # and set the batch_size to 50_000 which will require # ~50s = (50000 / 200) * (0.1 + 0.1) to complete and leaves breathing space # between the scheduled jobs - def initialize_conversion_of_integer_to_bigint( + def backfill_conversion_of_integer_to_bigint( table, column, primary_key: :id, @@ -969,10 +1008,6 @@ module Gitlab interval: 2.minutes ) - if transaction_open? - raise 'initialize_conversion_of_integer_to_bigint can not be run inside a transaction' - end - unless table_exists?(table) raise "Table #{table} does not exist" end @@ -985,87 +1020,42 @@ module Gitlab raise "Column #{column} does not exist on #{table}" end - check_trigger_permissions!(table) - - old_column = column_for(table, column) tmp_column = "#{column}_convert_to_bigint" - with_lock_retries do - if (column.to_s == primary_key.to_s) || !old_column.null - # If the column to be converted is either a PK or is defined as NOT NULL, - # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow - # That way, we skip the expensive validation step required to add - # a NOT NULL constraint at the end of the process - add_column(table, tmp_column, :bigint, default: old_column.default || 0, null: false) - else - add_column(table, tmp_column, :bigint, default: old_column.default) - end - - install_rename_triggers(table, column, tmp_column) - end - - source_model = Class.new(ActiveRecord::Base) do - include EachBatch - - self.table_name = table - self.inheritance_column = :_type_disabled + unless column_exists?(table, tmp_column) + raise 'The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`' end - queue_background_migration_jobs_by_range_at_intervals( - source_model, + batched_migration = queue_batched_background_migration( 'CopyColumnUsingBackgroundMigrationJob', - interval, + table, + primary_key, + column, + tmp_column, + job_interval: interval, batch_size: batch_size, - other_job_arguments: [table, primary_key, sub_batch_size, column, tmp_column], - track_jobs: true, - primary_column_name: primary_key - ) + sub_batch_size: sub_batch_size) if perform_background_migration_inline? # To ensure the schema is up to date immediately we perform the # migration inline in dev / test environments. - Gitlab::BackgroundMigration.steal('CopyColumnUsingBackgroundMigrationJob') + Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new.run_entire_migration(batched_migration) end end # Performs a concurrent column rename when using PostgreSQL. - def install_rename_triggers_for_postgresql(trigger, table, old, new) - execute <<-EOF.strip_heredoc - CREATE OR REPLACE FUNCTION #{trigger}() - RETURNS trigger AS - $BODY$ - BEGIN - NEW.#{new} := NEW.#{old}; - RETURN NEW; - END; - $BODY$ - LANGUAGE 'plpgsql' - VOLATILE - EOF - - execute <<-EOF.strip_heredoc - DROP TRIGGER IF EXISTS #{trigger} - ON #{table} - EOF - - execute <<-EOF.strip_heredoc - CREATE TRIGGER #{trigger} - BEFORE INSERT OR UPDATE - ON #{table} - FOR EACH ROW - EXECUTE FUNCTION #{trigger}() - EOF + def install_rename_triggers_for_postgresql(table, old, new, trigger_name: nil) + Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).create(old, new, trigger_name: trigger_name) end # Removes the triggers used for renaming a PostgreSQL column concurrently. def remove_rename_triggers_for_postgresql(table, trigger) - execute("DROP TRIGGER IF EXISTS #{trigger} ON #{table}") - execute("DROP FUNCTION IF EXISTS #{trigger}()") + Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).drop(trigger) end # Returns the (base) name to use for triggers when renaming columns. def rename_trigger_name(table, old, new) - 'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old}_#{new}").first(12) + Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).name(old, new) end # Returns an Array containing the indexes for the given column |