diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2020-09-19 01:45:44 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2020-09-19 01:45:44 +0000 |
commit | 85dc423f7090da0a52c73eb66faf22ddb20efff9 (patch) | |
tree | 9160f299afd8c80c038f08e1545be119f5e3f1e1 /lib/gitlab/database/partitioning_migration_helpers | |
parent | 15c2c8c66dbe422588e5411eee7e68f1fa440bb8 (diff) | |
download | gitlab-ce-85dc423f7090da0a52c73eb66faf22ddb20efff9.tar.gz |
Add latest changes from gitlab-org/gitlab@13-4-stable-ee
Diffstat (limited to 'lib/gitlab/database/partitioning_migration_helpers')
-rw-r--r-- | lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb | 85 |
2 files changed, 73 insertions, 16 deletions
diff --git a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb index 1fb9476b7d9..2def3a4d3a9 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/foreign_key_helpers.rb @@ -31,7 +31,7 @@ module Gitlab current_keys << specified_key else - Rails.logger.warn "foreign key not added because it already exists: #{specified_key}" # rubocop:disable Gitlab/RailsLogger + Gitlab::AppLogger.warn "foreign key not added because it already exists: #{specified_key}" current_keys end end @@ -56,7 +56,7 @@ module Gitlab existing_key.delete current_keys.delete(existing_key) else - Rails.logger.warn "foreign key not removed because it doesn't exist: #{specified_key}" # rubocop:disable Gitlab/RailsLogger + Gitlab::AppLogger.warn "foreign key not removed because it doesn't exist: #{specified_key}" end current_keys diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb index 84b6fb9f76e..f7b0306b769 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb @@ -6,6 +6,7 @@ module Gitlab module TableManagementHelpers include ::Gitlab::Database::SchemaHelpers include ::Gitlab::Database::DynamicModelHelpers + include ::Gitlab::Database::MigrationHelpers include ::Gitlab::Database::Migrations::BackgroundMigrationHelpers ALLOWED_TABLES = %w[audit_events].freeze @@ -15,6 +16,12 @@ module Gitlab BATCH_INTERVAL = 2.minutes.freeze BATCH_SIZE = 50_000 + JobArguments = Struct.new(:start_id, :stop_id, :source_table_name, :partitioned_table_name, :source_column) do + def self.from_array(arguments) + self.new(*arguments) + end + end + # Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column. # One partition is created per month between the given `min_date` and `max_date`. Also installs a trigger on # the original table to copy writes into the partitioned table. To copy over historic data from before creation @@ -134,6 +141,42 @@ module Gitlab end end + # Executes cleanup tasks from a previous BackgroundMigration to backfill a partitioned table by finishing + # pending jobs and performing a final data synchronization. + # This performs two steps: + # 1. Wait to finish any pending BackgroundMigration jobs that have not succeeded + # 2. Inline copy any missed rows from the original table to the partitioned table + # + # **NOTE** Migrations using this method cannot be scheduled in the same release as the migration that + # schedules the background migration using the `enqueue_background_migration` helper, or else the + # background migration jobs will be force-executed. + # + # Example: + # + # finalize_backfilling_partitioned_table :audit_events + # + def finalize_backfilling_partitioned_table(table_name) + assert_table_is_allowed(table_name) + assert_not_in_transaction_block(scope: ERROR_SCOPE) + + partitioned_table_name = make_partitioned_table_name(table_name) + unless table_exists?(partitioned_table_name) + raise "could not find partitioned table for #{table_name}, " \ + "this could indicate the previous partitioning migration has been rolled back." + end + + Gitlab::BackgroundMigration.steal(MIGRATION_CLASS_NAME) do |raw_arguments| + JobArguments.from_array(raw_arguments).source_table_name == table_name.to_s + end + + primary_key = connection.primary_key(table_name) + copy_missed_records(table_name, partitioned_table_name, primary_key) + + disable_statement_timeout do + execute("VACUUM FREEZE ANALYZE #{partitioned_table_name}") + end + end + private def assert_table_is_allowed(table_name) @@ -161,10 +204,8 @@ module Gitlab def create_range_partitioned_copy(source_table_name, partitioned_table_name, partition_column, primary_key) if table_exists?(partitioned_table_name) - # rubocop:disable Gitlab/RailsLogger - Rails.logger.warn "Partitioned table not created because it already exists" \ + Gitlab::AppLogger.warn "Partitioned table not created because it already exists" \ " (this may be due to an aborted migration or similar): table_name: #{partitioned_table_name} " - # rubocop:enable Gitlab/RailsLogger return end @@ -217,10 +258,8 @@ module Gitlab def create_range_partition_safely(partition_name, table_name, lower_bound, upper_bound) if table_exists?(table_for_range_partition(partition_name)) - # rubocop:disable Gitlab/RailsLogger - Rails.logger.warn "Partition not created because it already exists" \ + Gitlab::AppLogger.warn "Partition not created because it already exists" \ " (this may be due to an aborted migration or similar): partition_name: #{partition_name}" - # rubocop:enable Gitlab/RailsLogger return end @@ -241,10 +280,8 @@ module Gitlab def create_sync_function(name, partitioned_table_name, unique_key) if function_exists?(name) - # rubocop:disable Gitlab/RailsLogger - Rails.logger.warn "Partitioning sync function not created because it already exists" \ + Gitlab::AppLogger.warn "Partitioning sync function not created because it already exists" \ " (this may be due to an aborted migration or similar): function name: #{name}" - # rubocop:enable Gitlab/RailsLogger return end @@ -276,17 +313,15 @@ module Gitlab def create_sync_trigger(table_name, trigger_name, function_name) if trigger_exists?(table_name, trigger_name) - # rubocop:disable Gitlab/RailsLogger - Rails.logger.warn "Partitioning sync trigger not created because it already exists" \ + Gitlab::AppLogger.warn "Partitioning sync trigger not created because it already exists" \ " (this may be due to an aborted migration or similar): trigger name: #{trigger_name}" - # rubocop:enable Gitlab/RailsLogger return end create_trigger(table_name, trigger_name, function_name, fires: 'AFTER INSERT OR UPDATE OR DELETE') end - def enqueue_background_migration(source_table_name, partitioned_table_name, source_key) + def enqueue_background_migration(source_table_name, partitioned_table_name, source_column) source_model = define_batchable_model(source_table_name) queue_background_migration_jobs_by_range_at_intervals( @@ -294,13 +329,35 @@ module Gitlab MIGRATION_CLASS_NAME, BATCH_INTERVAL, batch_size: BATCH_SIZE, - other_job_arguments: [source_table_name.to_s, partitioned_table_name, source_key], + other_job_arguments: [source_table_name.to_s, partitioned_table_name, source_column], track_jobs: true) end def cleanup_migration_jobs(table_name) ::Gitlab::Database::BackgroundMigrationJob.for_partitioning_migration(MIGRATION_CLASS_NAME, table_name).delete_all end + + def copy_missed_records(source_table_name, partitioned_table_name, source_column) + backfill_table = BackfillPartitionedTable.new + relation = ::Gitlab::Database::BackgroundMigrationJob.pending + .for_partitioning_migration(MIGRATION_CLASS_NAME, source_table_name) + + relation.each_batch do |batch| + batch.each do |pending_migration_job| + job_arguments = JobArguments.from_array(pending_migration_job.arguments) + start_id = job_arguments.start_id + stop_id = job_arguments.stop_id + + say("Backfilling data into partitioned table for ids from #{start_id} to #{stop_id}") + job_updated_count = backfill_table.perform(start_id, stop_id, source_table_name, + partitioned_table_name, source_column) + + unless job_updated_count > 0 + raise "failed to update tracking record for ids from #{start_id} to #{stop_id}" + end + end + end + end end end end |