diff options
Diffstat (limited to 'lib/gitlab/database')
24 files changed, 545 insertions, 115 deletions
diff --git a/lib/gitlab/database/as_with_materialized.rb b/lib/gitlab/database/as_with_materialized.rb index 7c45f416638..e7e3c1766a9 100644 --- a/lib/gitlab/database/as_with_materialized.rb +++ b/lib/gitlab/database/as_with_materialized.rb @@ -3,19 +3,15 @@ module Gitlab module Database # This class is a special Arel node which allows optionally define the `MATERIALIZED` keyword for CTE and Recursive CTE queries. - class AsWithMaterialized < Arel::Nodes::Binary + class AsWithMaterialized < Arel::Nodes::As extend Gitlab::Utils::StrongMemoize - MATERIALIZED = Arel.sql(' MATERIALIZED') - EMPTY_STRING = Arel.sql('') - attr_reader :expr + MATERIALIZED = 'MATERIALIZED ' def initialize(left, right, materialized: true) - @expr = if materialized && self.class.materialized_supported? - MATERIALIZED - else - EMPTY_STRING - end + if materialized && self.class.materialized_supported? + right.prepend(MATERIALIZED) + end super(left, right) end diff --git a/lib/gitlab/database/background_migration/batch_optimizer.rb b/lib/gitlab/database/background_migration/batch_optimizer.rb new file mode 100644 index 00000000000..0668490dda8 --- /dev/null +++ b/lib/gitlab/database/background_migration/batch_optimizer.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module BackgroundMigration + # This is an optimizer for throughput of batched migration jobs + # + # The underyling mechanic is based on the concept of time efficiency: + # time efficiency = job duration / interval + # Ideally, this is close but lower than 1 - so we're using time efficiently. + # + # We aim to land in the 90%-98% range, which gives the database a little breathing room + # in between. + # + # The optimizer is based on calculating the exponential moving average of time efficiencies + # for the last N jobs. If we're outside the range, we add 10% to or decrease by 20% of the batch size. + class BatchOptimizer + # Target time efficiency for a job + # Time efficiency is defined as: job duration / interval + TARGET_EFFICIENCY = (0.9..0.95).freeze + + # Lower and upper bound for the batch size + ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze + + # Limit for the multiplier of the batch size + MAX_MULTIPLIER = 1.2 + + # When smoothing time efficiency, use this many jobs + NUMBER_OF_JOBS = 20 + + # Smoothing factor for exponential moving average + EMA_ALPHA = 0.4 + + attr_reader :migration, :number_of_jobs, :ema_alpha + + def initialize(migration, number_of_jobs: NUMBER_OF_JOBS, ema_alpha: EMA_ALPHA) + @migration = migration + @number_of_jobs = number_of_jobs + @ema_alpha = ema_alpha + end + + def optimize! + return unless Feature.enabled?(:optimize_batched_migrations, type: :ops, default_enabled: :yaml) + + if multiplier = batch_size_multiplier + migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(ALLOWED_BATCH_SIZE) + migration.save! + end + end + + private + + def batch_size_multiplier + efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs, alpha: ema_alpha) + + return if efficiency.nil? || efficiency == 0 + + # We hit the range - no change + return if TARGET_EFFICIENCY.include?(efficiency) + + # Assumption: time efficiency is linear in the batch size + [TARGET_EFFICIENCY.max / efficiency, MAX_MULTIPLIER].min + end + end + end + end +end diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb index 3b624df2bfd..869b97b8ac0 100644 --- a/lib/gitlab/database/background_migration/batched_job.rb +++ b/lib/gitlab/database/background_migration/batched_job.rb @@ -4,10 +4,23 @@ module Gitlab module Database module BackgroundMigration class BatchedJob < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord + include FromUnion + self.table_name = :batched_background_migration_jobs + MAX_ATTEMPTS = 3 + STUCK_JOBS_TIMEOUT = 1.hour.freeze + belongs_to :batched_migration, foreign_key: :batched_background_migration_id + scope :active, -> { where(status: [:pending, :running]) } + scope :stuck, -> { active.where('updated_at <= ?', STUCK_JOBS_TIMEOUT.ago) } + scope :retriable, -> { + failed_jobs = where(status: :failed).where('attempts < ?', MAX_ATTEMPTS) + + from_union([failed_jobs, self.stuck]) + } + enum status: { pending: 0, running: 1, @@ -15,8 +28,22 @@ module Gitlab succeeded: 3 } + scope :successful_in_execution_order, -> { where.not(finished_at: nil).succeeded.order(:finished_at) } + delegate :aborted?, :job_class, :table_name, :column_name, :job_arguments, to: :batched_migration, prefix: :migration + + attribute :pause_ms, :integer, default: 100 + + def time_efficiency + return unless succeeded? + return unless finished_at && started_at + + duration = finished_at - started_at + + # TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801) + duration.to_f / batched_migration.interval + end end end end diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb index 4aa33ed7946..e85162f355e 100644 --- a/lib/gitlab/database/background_migration/batched_migration.rb +++ b/lib/gitlab/database/background_migration/batched_migration.rb @@ -20,9 +20,12 @@ module Gitlab paused: 0, active: 1, aborted: 2, - finished: 3 + finished: 3, + failed: 4 } + attribute :pause_ms, :integer, default: 100 + def self.active_migration active.queue_order.first end @@ -35,7 +38,13 @@ module Gitlab end def create_batched_job!(min, max) - batched_jobs.create!(min_value: min, max_value: max, batch_size: batch_size, sub_batch_size: sub_batch_size) + batched_jobs.create!( + min_value: min, + max_value: max, + batch_size: batch_size, + sub_batch_size: sub_batch_size, + pause_ms: pause_ms + ) end def next_min_value @@ -58,12 +67,40 @@ module Gitlab write_attribute(:batch_class_name, class_name.demodulize) end + def migrated_tuple_count + batched_jobs.succeeded.sum(:batch_size) + end + def prometheus_labels @prometheus_labels ||= { migration_id: id, migration_identifier: "%s/%s.%s" % [job_class_name, table_name, column_name] } end + + def smoothed_time_efficiency(number_of_jobs: 10, alpha: 0.2) + jobs = batched_jobs.successful_in_execution_order.reverse_order.limit(number_of_jobs) + + return if jobs.size < number_of_jobs + + efficiencies = jobs.map(&:time_efficiency).reject(&:nil?).each_with_index + + dividend = efficiencies.reduce(0) do |total, (job_eff, i)| + total + job_eff * (1 - alpha)**i + end + + divisor = efficiencies.reduce(0) do |total, (job_eff, i)| + total + (1 - alpha)**i + end + + return if divisor == 0 + + (dividend / divisor).round(2) + end + + def optimize! + BatchOptimizer.new(self).optimize! + end end end end diff --git a/lib/gitlab/database/background_migration/batched_migration_runner.rb b/lib/gitlab/database/background_migration/batched_migration_runner.rb index cf8b61f5feb..67fe6c536e6 100644 --- a/lib/gitlab/database/background_migration/batched_migration_runner.rb +++ b/lib/gitlab/database/background_migration/batched_migration_runner.rb @@ -19,8 +19,10 @@ module Gitlab # # Note that this method is primarily intended to called by a scheduled worker. def run_migration_job(active_migration) - if next_batched_job = create_next_batched_job!(active_migration) + if next_batched_job = find_or_create_next_batched_job(active_migration) migration_wrapper.perform(next_batched_job) + + active_migration.optimize! else finish_active_migration(active_migration) end @@ -46,12 +48,12 @@ module Gitlab attr_reader :migration_wrapper - def create_next_batched_job!(active_migration) - next_batch_range = find_next_batch_range(active_migration) - - return if next_batch_range.nil? - - active_migration.create_batched_job!(next_batch_range.min, next_batch_range.max) + def find_or_create_next_batched_job(active_migration) + if next_batch_range = find_next_batch_range(active_migration) + active_migration.create_batched_job!(next_batch_range.min, next_batch_range.max) + else + active_migration.batched_jobs.retriable.first + end end def find_next_batch_range(active_migration) @@ -80,7 +82,13 @@ module Gitlab end def finish_active_migration(active_migration) - active_migration.finished! + return if active_migration.batched_jobs.active.exists? + + if active_migration.batched_jobs.failed.exists? + active_migration.failed! + else + active_migration.finished! + end end end end diff --git a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb index c276f8ce75b..e37df102872 100644 --- a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb +++ b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb @@ -19,10 +19,10 @@ module Gitlab execute_batch(batch_tracking_record) batch_tracking_record.status = :succeeded - rescue => e + rescue Exception # rubocop:disable Lint/RescueException batch_tracking_record.status = :failed - raise e + raise ensure finish_tracking_execution(batch_tracking_record) track_prometheus_metrics(batch_tracking_record) @@ -31,7 +31,7 @@ module Gitlab private def start_tracking_execution(tracking_record) - tracking_record.update!(attempts: tracking_record.attempts + 1, status: :running, started_at: Time.current) + tracking_record.update!(attempts: tracking_record.attempts + 1, status: :running, started_at: Time.current, finished_at: nil, metrics: {}) end def execute_batch(tracking_record) @@ -43,6 +43,7 @@ module Gitlab tracking_record.migration_table_name, tracking_record.migration_column_name, tracking_record.sub_batch_size, + tracking_record.pause_ms, *tracking_record.migration_job_arguments) if job_instance.respond_to?(:batch_metrics) @@ -61,11 +62,12 @@ module Gitlab metric_for(:gauge_batch_size).set(base_labels, tracking_record.batch_size) metric_for(:gauge_sub_batch_size).set(base_labels, tracking_record.sub_batch_size) + metric_for(:gauge_interval).set(base_labels, tracking_record.batched_migration.interval) + metric_for(:gauge_job_duration).set(base_labels, (tracking_record.finished_at - tracking_record.started_at).to_i) metric_for(:counter_updated_tuples).increment(base_labels, tracking_record.batch_size) - - # Time efficiency: Ratio of duration to interval (ideal: less than, but close to 1) - efficiency = (tracking_record.finished_at - tracking_record.started_at).to_i / migration.interval.to_f - metric_for(:histogram_time_efficiency).observe(base_labels, efficiency) + metric_for(:gauge_migrated_tuples).set(base_labels, tracking_record.batched_migration.migrated_tuple_count) + metric_for(:gauge_total_tuple_count).set(base_labels, tracking_record.batched_migration.total_tuple_count) + metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i) if metrics = tracking_record.metrics metrics['timings']&.each do |key, timings| @@ -94,21 +96,35 @@ module Gitlab :batched_migration_job_sub_batch_size, 'Sub-batch size for a batched migration job' ), + gauge_interval: Gitlab::Metrics.gauge( + :batched_migration_job_interval_seconds, + 'Interval for a batched migration job' + ), + gauge_job_duration: Gitlab::Metrics.gauge( + :batched_migration_job_duration_seconds, + 'Duration for a batched migration job' + ), counter_updated_tuples: Gitlab::Metrics.counter( :batched_migration_job_updated_tuples_total, 'Number of tuples updated by batched migration job' ), + gauge_migrated_tuples: Gitlab::Metrics.gauge( + :batched_migration_migrated_tuples_total, + 'Total number of tuples migrated by a batched migration' + ), histogram_timings: Gitlab::Metrics.histogram( - :batched_migration_job_duration_seconds, - 'Timings for a batched migration job', + :batched_migration_job_query_duration_seconds, + 'Query timings for a batched migration job', {}, [0.1, 0.25, 0.5, 1, 5].freeze ), - histogram_time_efficiency: Gitlab::Metrics.histogram( - :batched_migration_job_time_efficiency, - 'Ratio of job duration to interval', - {}, - [0.5, 0.9, 1, 1.5, 2].freeze + gauge_total_tuple_count: Gitlab::Metrics.gauge( + :batched_migration_total_tuple_count, + 'Total tuple count the migration needs to touch' + ), + gauge_last_update_time: Gitlab::Metrics.gauge( + :batched_migration_last_update_time_seconds, + 'Unix epoch time in seconds' ) } end diff --git a/lib/gitlab/database/background_migration_job.rb b/lib/gitlab/database/background_migration_job.rb index 1b9d7cbc9a1..1121793917b 100644 --- a/lib/gitlab/database/background_migration_job.rb +++ b/lib/gitlab/database/background_migration_job.rb @@ -9,7 +9,7 @@ module Gitlab scope :for_migration_class, -> (class_name) { where(class_name: normalize_class_name(class_name)) } scope :for_migration_execution, -> (class_name, arguments) do - for_migration_class(class_name).where('arguments = ?', arguments.to_json) + for_migration_class(class_name).where('arguments = ?', arguments.to_json) # rubocop:disable Rails/WhereEquals end scope :for_partitioning_migration, -> (class_name, table_name) do diff --git a/lib/gitlab/database/consistency.rb b/lib/gitlab/database/consistency.rb index b7d06a26ddb..e99ea7a3232 100644 --- a/lib/gitlab/database/consistency.rb +++ b/lib/gitlab/database/consistency.rb @@ -28,4 +28,4 @@ module Gitlab end end -::Gitlab::Database::Consistency.singleton_class.prepend_if_ee('EE::Gitlab::Database::Consistency') +::Gitlab::Database::Consistency.singleton_class.prepend_mod_with('Gitlab::Database::Consistency') diff --git a/lib/gitlab/database/loose_index_scan_distinct_count.rb b/lib/gitlab/database/loose_index_scan_distinct_count.rb index 884f4d47ff8..26be07f91c4 100644 --- a/lib/gitlab/database/loose_index_scan_distinct_count.rb +++ b/lib/gitlab/database/loose_index_scan_distinct_count.rb @@ -11,7 +11,7 @@ module Gitlab # This query will read each element in the index matching the project_id filter. # If for a project_id has 100_000 issues, all 100_000 elements will be read. # - # A loose index scan will read only one entry from the index for each project_id to reduce the number of disk reads. + # A loose index scan will only read one entry from the index for each project_id to reduce the number of disk reads. # # Usage: # @@ -94,7 +94,7 @@ module Gitlab elsif column.is_a?(Arel::Attributes::Attribute) column else - raise ColumnConfigurationError.new("Cannot transform the column: #{column.inspect}, please provide the column name as string") + raise ColumnConfigurationError, "Cannot transform the column: #{column.inspect}, please provide the column name as string" end end end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index d06a73da8ac..3a94e109d2a 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -5,6 +5,7 @@ module Gitlab module MigrationHelpers include Migrations::BackgroundMigrationHelpers include DynamicModelHelpers + include Migrations::RenameTableHelpers # https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS MAX_IDENTIFIER_NAME_LENGTH = 63 @@ -51,7 +52,7 @@ module Gitlab allow_null: options[:null] ) else - add_column(table_name, column_name, :datetime_with_timezone, options) + add_column(table_name, column_name, :datetime_with_timezone, **options) end end end @@ -143,13 +144,13 @@ module Gitlab options = options.merge({ algorithm: :concurrently }) - if index_exists?(table_name, column_name, options) + if index_exists?(table_name, column_name, **options) Gitlab::AppLogger.warn "Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}" return end disable_statement_timeout do - add_index(table_name, column_name, options) + add_index(table_name, column_name, **options) end end @@ -169,13 +170,13 @@ module Gitlab options = options.merge({ algorithm: :concurrently }) - unless index_exists?(table_name, column_name, options) + unless index_exists?(table_name, column_name, **options) Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}" return end disable_statement_timeout do - remove_index(table_name, options.merge({ column: column_name })) + remove_index(table_name, **options.merge({ column: column_name })) end end @@ -205,7 +206,7 @@ module Gitlab end disable_statement_timeout do - remove_index(table_name, options.merge({ name: index_name })) + remove_index(table_name, **options.merge({ name: index_name })) end end @@ -565,7 +566,7 @@ module Gitlab check_trigger_permissions!(table) - remove_rename_triggers_for_postgresql(table, trigger_name) + remove_rename_triggers(table, trigger_name) remove_column(table, new) end @@ -576,8 +577,19 @@ module Gitlab # table - The name of the table to install the trigger in. # old_column - The name of the old column. # new_column - The name of the new column. - def install_rename_triggers(table, old_column, new_column) - install_rename_triggers_for_postgresql(table, old_column, new_column) + # trigger_name - The name of the trigger to use (optional). + def install_rename_triggers(table, old, new, trigger_name: nil) + Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).create(old, new, trigger_name: trigger_name) + end + + # Removes the triggers used for renaming a column concurrently. + def remove_rename_triggers(table, trigger) + Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).drop(trigger) + end + + # Returns the (base) name to use for triggers when renaming columns. + def rename_trigger_name(table, old, new) + Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).name(old, new) end # Changes the type of a column concurrently. @@ -663,7 +675,7 @@ module Gitlab install_rename_triggers(table, column, temp_column) end - rescue + rescue StandardError # create_column_from can not run inside a transaction, which means # that there is a risk that if any of the operations that follow it # fail, we'll be left with an inconsistent schema @@ -690,7 +702,7 @@ module Gitlab check_trigger_permissions!(table) - remove_rename_triggers_for_postgresql(table, trigger_name) + remove_rename_triggers(table, trigger_name) remove_column(table, old) end @@ -905,7 +917,11 @@ module Gitlab end end - # Initializes the conversion of an integer column to bigint + def convert_to_bigint_column(column) + "#{column}_convert_to_bigint" + end + + # Initializes the conversion of a set of integer columns to bigint # # It can be used for converting both a Primary Key and any Foreign Keys # that may reference it or any other integer column that we may want to @@ -923,14 +939,14 @@ module Gitlab # Note: this helper is intended to be used in a regular (pre-deployment) migration. # # This helper is part 1 of a multi-step migration process: - # 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers + # 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger # 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations # 3. remaining steps TBD, see #288005 # # table - The name of the database table containing the column - # column - The name of the column that we want to convert to bigint. + # columns - The name, or array of names, of the column(s) that we want to convert to bigint. # primary_key - The name of the primary key column (most often :id) - def initialize_conversion_of_integer_to_bigint(table, column, primary_key: :id) + def initialize_conversion_of_integer_to_bigint(table, columns, primary_key: :id) unless table_exists?(table) raise "Table #{table} does not exist" end @@ -939,34 +955,54 @@ module Gitlab raise "Column #{primary_key} does not exist on #{table}" end - unless column_exists?(table, column) - raise "Column #{column} does not exist on #{table}" + columns = Array.wrap(columns) + columns.each do |column| + next if column_exists?(table, column) + + raise ArgumentError, "Column #{column} does not exist on #{table}" end check_trigger_permissions!(table) - old_column = column_for(table, column) - tmp_column = "#{column}_convert_to_bigint" + conversions = columns.to_h { |column| [column, convert_to_bigint_column(column)] } with_lock_retries do - if (column.to_s == primary_key.to_s) || !old_column.null - # If the column to be converted is either a PK or is defined as NOT NULL, - # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow - # That way, we skip the expensive validation step required to add - # a NOT NULL constraint at the end of the process - add_column(table, tmp_column, :bigint, default: old_column.default || 0, null: false) - else - add_column(table, tmp_column, :bigint, default: old_column.default) + conversions.each do |(source_column, temporary_name)| + column = column_for(table, source_column) + + if (column.name.to_s == primary_key.to_s) || !column.null + # If the column to be converted is either a PK or is defined as NOT NULL, + # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow + # That way, we skip the expensive validation step required to add + # a NOT NULL constraint at the end of the process + add_column(table, temporary_name, :bigint, default: column.default || 0, null: false) + else + add_column(table, temporary_name, :bigint, default: column.default) + end end - install_rename_triggers(table, column, tmp_column) + install_rename_triggers(table, conversions.keys, conversions.values) end end - # Backfills the new column used in the conversion of an integer column to bigint using background migrations. + # Reverts `initialize_conversion_of_integer_to_bigint` + # + # table - The name of the database table containing the columns + # columns - The name, or array of names, of the column(s) that we're converting to bigint. + def revert_initialize_conversion_of_integer_to_bigint(table, columns) + columns = Array.wrap(columns) + temporary_columns = columns.map { |column| convert_to_bigint_column(column) } + + trigger_name = rename_trigger_name(table, columns, temporary_columns) + remove_rename_triggers(table, trigger_name) + + temporary_columns.each { |column| remove_column(table, column) } + end + + # Backfills the new columns used in an integer-to-bigint conversion using background migrations. # # - This helper should be called from a post-deployment migration. - # - In order for this helper to work properly, the new column must be first initialized with + # - In order for this helper to work properly, the new columns must be first initialized with # the `initialize_conversion_of_integer_to_bigint` helper. # - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration, # which allows a more thorough check that all jobs succeeded in the @@ -976,12 +1012,12 @@ module Gitlab # deployed (including background job changes) before we begin processing the background migration. # # This helper is part 2 of a multi-step migration process: - # 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers + # 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger # 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations # 3. remaining steps TBD, see #288005 # # table - The name of the database table containing the column - # column - The name of the column that we want to convert to bigint. + # columns - The name, or an array of names, of the column(s) we want to convert to bigint. # primary_key - The name of the primary key column (most often :id) # batch_size - The number of rows to schedule in a single background migration # sub_batch_size - The smaller batches that will be used by each scheduled job @@ -1001,7 +1037,7 @@ module Gitlab # between the scheduled jobs def backfill_conversion_of_integer_to_bigint( table, - column, + columns, primary_key: :id, batch_size: 20_000, sub_batch_size: 1000, @@ -1016,46 +1052,43 @@ module Gitlab raise "Column #{primary_key} does not exist on #{table}" end - unless column_exists?(table, column) - raise "Column #{column} does not exist on #{table}" - end + conversions = Array.wrap(columns).to_h do |column| + raise ArgumentError, "Column #{column} does not exist on #{table}" unless column_exists?(table, column) - tmp_column = "#{column}_convert_to_bigint" + temporary_name = convert_to_bigint_column(column) + raise ArgumentError, "Column #{temporary_name} does not exist on #{table}" unless column_exists?(table, temporary_name) - unless column_exists?(table, tmp_column) - raise 'The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`' + [column, temporary_name] end - batched_migration = queue_batched_background_migration( + queue_batched_background_migration( 'CopyColumnUsingBackgroundMigrationJob', table, primary_key, - column, - tmp_column, + conversions.keys, + conversions.values, job_interval: interval, batch_size: batch_size, sub_batch_size: sub_batch_size) - - if perform_background_migration_inline? - # To ensure the schema is up to date immediately we perform the - # migration inline in dev / test environments. - Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new.run_entire_migration(batched_migration) - end end - # Performs a concurrent column rename when using PostgreSQL. - def install_rename_triggers_for_postgresql(table, old, new, trigger_name: nil) - Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).create(old, new, trigger_name: trigger_name) - end + # Reverts `backfill_conversion_of_integer_to_bigint` + # + # table - The name of the database table containing the column + # columns - The name, or an array of names, of the column(s) we want to convert to bigint. + # primary_key - The name of the primary key column (most often :id) + def revert_backfill_conversion_of_integer_to_bigint(table, columns, primary_key: :id) + columns = Array.wrap(columns) - # Removes the triggers used for renaming a PostgreSQL column concurrently. - def remove_rename_triggers_for_postgresql(table, trigger) - Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).drop(trigger) - end + conditions = ActiveRecord::Base.sanitize_sql([ + 'job_class_name = :job_class_name AND table_name = :table_name AND column_name = :column_name AND job_arguments = :job_arguments', + job_class_name: 'CopyColumnUsingBackgroundMigrationJob', + table_name: table, + column_name: primary_key, + job_arguments: [columns, columns.map { |column| convert_to_bigint_column(column) }].to_json + ]) - # Returns the (base) name to use for triggers when renaming columns. - def rename_trigger_name(table, old, new) - Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).name(old, new) + execute("DELETE FROM batched_background_migrations WHERE #{conditions}") end # Returns an Array containing the indexes for the given column @@ -1162,8 +1195,8 @@ module Gitlab end end - def remove_foreign_key_without_error(*args) - remove_foreign_key(*args) + def remove_foreign_key_without_error(*args, **kwargs) + remove_foreign_key(*args, **kwargs) rescue ArgumentError end diff --git a/lib/gitlab/database/migration_helpers/cascading_namespace_settings.rb b/lib/gitlab/database/migration_helpers/cascading_namespace_settings.rb new file mode 100644 index 00000000000..eecf96acb30 --- /dev/null +++ b/lib/gitlab/database/migration_helpers/cascading_namespace_settings.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module MigrationHelpers + module CascadingNamespaceSettings + include Gitlab::Database::MigrationHelpers + + # Creates the four required columns that constitutes a single cascading + # namespace settings attribute. This helper is only appropriate if the + # setting is not already present as a non-cascading attribute. + # + # Creates the `setting_name` column along with the `lock_setting_name` + # column in both `namespace_settings` and `application_settings`. + # + # This helper is not reversible and must be defined in conjunction with + # `remove_cascading_namespace_setting` in separate up and down directions. + # + # setting_name - The name of the cascading attribute - same as defined + # in `NamespaceSetting` with the `cascading_attr` method. + # type - The column type for the setting itself (:boolean, :integer, etc.) + # options - Standard Rails column options hash. Accepts keys such as + # `null` and `default`. + # + # `null` and `default` options will only be applied to the `application_settings` + # column. In most cases, a non-null default value should be specified. + def add_cascading_namespace_setting(setting_name, type, **options) + lock_column_name = "lock_#{setting_name}".to_sym + + check_cascading_namespace_setting_consistency(setting_name, lock_column_name) + + namespace_options = options.merge(null: true, default: nil) + + with_lock_retries do + add_column(:namespace_settings, setting_name, type, namespace_options) + add_column(:namespace_settings, lock_column_name, :boolean, default: false, null: false) + end + + add_column(:application_settings, setting_name, type, options) + add_column(:application_settings, lock_column_name, :boolean, default: false, null: false) + end + + def remove_cascading_namespace_setting(setting_name) + lock_column_name = "lock_#{setting_name}".to_sym + + with_lock_retries do + remove_column(:namespace_settings, setting_name) if column_exists?(:namespace_settings, setting_name) + remove_column(:namespace_settings, lock_column_name) if column_exists?(:namespace_settings, lock_column_name) + end + + remove_column(:application_settings, setting_name) if column_exists?(:application_settings, setting_name) + remove_column(:application_settings, lock_column_name) if column_exists?(:application_settings, lock_column_name) + end + + private + + def check_cascading_namespace_setting_consistency(setting_name, lock_name) + existing_columns = [] + + %w(namespace_settings application_settings).each do |table| + existing_columns << "#{table}.#{setting_name}" if column_exists?(table.to_sym, setting_name) + existing_columns << "#{table}.#{lock_name}" if column_exists?(table.to_sym, lock_name) + end + + return if existing_columns.empty? + + raise <<~ERROR + One or more cascading namespace columns already exist. `add_cascading_namespace_setting` helper + can only be used for new settings, when none of the required columns already exist. + Existing columns: #{existing_columns.join(', ')} + ERROR + end + end + end + end +end diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb index 959028ce00b..e9ef80d5198 100644 --- a/lib/gitlab/database/migrations/instrumentation.rb +++ b/lib/gitlab/database/migrations/instrumentation.rb @@ -4,6 +4,9 @@ module Gitlab module Database module Migrations class Instrumentation + RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze + STATS_FILENAME = 'migration-stats.json' + attr_reader :observations def initialize(observers = ::Gitlab::Database::Migrations::Observers.all_observers) @@ -21,7 +24,7 @@ module Gitlab observation.walltime = Benchmark.realtime do yield - rescue => e + rescue StandardError => e exception = e observation.success = false end @@ -47,7 +50,7 @@ module Gitlab def on_each_observer(&block) observers.each do |observer| yield observer - rescue => e + rescue StandardError => e Gitlab::AppLogger.error("Migration observer #{observer.class} failed with: #{e}") end end diff --git a/lib/gitlab/database/migrations/observers.rb b/lib/gitlab/database/migrations/observers.rb index 592993aeac5..b65a303ef30 100644 --- a/lib/gitlab/database/migrations/observers.rb +++ b/lib/gitlab/database/migrations/observers.rb @@ -7,7 +7,8 @@ module Gitlab def self.all_observers [ TotalDatabaseSizeChange.new, - QueryStatistics.new + QueryStatistics.new, + QueryLog.new ] end end diff --git a/lib/gitlab/database/migrations/observers/query_log.rb b/lib/gitlab/database/migrations/observers/query_log.rb new file mode 100644 index 00000000000..45df07fe391 --- /dev/null +++ b/lib/gitlab/database/migrations/observers/query_log.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + module Observers + class QueryLog < MigrationObserver + def before + @logger_was = ActiveRecord::Base.logger + @log_file_path = File.join(Instrumentation::RESULT_DIR, 'current.log') + @logger = Logger.new(@log_file_path) + ActiveRecord::Base.logger = @logger + end + + def after + ActiveRecord::Base.logger = @logger_was + @logger.close + end + + def record(observation) + File.rename(@log_file_path, File.join(Instrumentation::RESULT_DIR, "#{observation.migration}.log")) + end + end + end + end + end +end diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_creator.rb index 547e0b9b957..d4b2b8d50e2 100644 --- a/lib/gitlab/database/partitioning/partition_creator.rb +++ b/lib/gitlab/database/partitioning/partition_creator.rb @@ -38,7 +38,7 @@ module Gitlab create(model, partitions_to_create) end - rescue => e + rescue StandardError => e Gitlab::AppLogger.error("Failed to create partition(s) for #{model.table_name}: #{e.class}: #{e.message}") end end diff --git a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb index 0bc1343acca..c0cc97de276 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb @@ -40,7 +40,7 @@ module Gitlab end with_lock_retries do - add_index(table_name, column_names, options) + add_index(table_name, column_names, **options) end end diff --git a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb index e8b49c7f62c..aa46b98be5d 100644 --- a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb +++ b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb @@ -69,10 +69,8 @@ module Gitlab hll_buckets = Buckets.new while batch_start <= finish - begin - hll_buckets.merge_hash!(hll_buckets_for_batch(batch_start, batch_start + batch_size)) - batch_start += batch_size - end + hll_buckets.merge_hash!(hll_buckets_for_batch(batch_start, batch_start + batch_size)) + batch_start += batch_size sleep(SLEEP_TIME_IN_SECONDS) end diff --git a/lib/gitlab/database/reindexing/concurrent_reindex.rb b/lib/gitlab/database/reindexing/concurrent_reindex.rb index a6fe7d61a4f..7e2dd55d21b 100644 --- a/lib/gitlab/database/reindexing/concurrent_reindex.rb +++ b/lib/gitlab/database/reindexing/concurrent_reindex.rb @@ -11,7 +11,14 @@ module Gitlab PG_IDENTIFIER_LENGTH = 63 TEMPORARY_INDEX_PREFIX = 'tmp_reindex_' REPLACED_INDEX_PREFIX = 'old_reindex_' - STATEMENT_TIMEOUT = 6.hours + STATEMENT_TIMEOUT = 9.hours + + # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock, + # which only conflicts with DDL and vacuum. We therefore execute this with a rather + # high lock timeout and a long pause in between retries. This is an alternative to + # setting a high statement timeout, which would lead to a long running query with effects + # on e.g. vacuum. + REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 attr_reader :index, :logger @@ -70,7 +77,7 @@ module Gitlab ensure begin remove_index(index.schema, replacement_index_name) - rescue => e + rescue StandardError => e logger.error(e) end end @@ -95,7 +102,13 @@ module Gitlab def remove_index(schema, name) logger.info("Removing index #{schema}.#{name}") - set_statement_timeout do + retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( + timing_configuration: REMOVE_INDEX_RETRY_CONFIG, + klass: self.class, + logger: logger + ) + + retries.run(raise_on_exhaustion: false) do connection.execute(<<~SQL) DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)} @@ -121,7 +134,6 @@ module Gitlab def with_lock_retries(&block) arguments = { klass: self.class, logger: logger } - Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block) end diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb index 7a7d17ca196..d68f47b5b6c 100644 --- a/lib/gitlab/database/reindexing/coordinator.rb +++ b/lib/gitlab/database/reindexing/coordinator.rb @@ -42,7 +42,7 @@ module Gitlab def perform_for(index, action) ConcurrentReindex.new(index).perform - rescue + rescue StandardError action.state = :failed raise diff --git a/lib/gitlab/database/reindexing/grafana_notifier.rb b/lib/gitlab/database/reindexing/grafana_notifier.rb index b1e5ecb9ade..f4ea59deb50 100644 --- a/lib/gitlab/database/reindexing/grafana_notifier.rb +++ b/lib/gitlab/database/reindexing/grafana_notifier.rb @@ -53,7 +53,7 @@ module Gitlab log_error("Response code #{response.code}") unless success success - rescue => err + rescue StandardError => err log_error(err) false diff --git a/lib/gitlab/database/rename_table_helpers.rb b/lib/gitlab/database/rename_table_helpers.rb new file mode 100644 index 00000000000..7f5af038c6d --- /dev/null +++ b/lib/gitlab/database/rename_table_helpers.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module RenameTableHelpers + def rename_table_safely(old_table_name, new_table_name) + with_lock_retries do + rename_table(old_table_name, new_table_name) + execute("CREATE VIEW #{old_table_name} AS SELECT * FROM #{new_table_name}") + end + end + + def undo_rename_table_safely(old_table_name, new_table_name) + with_lock_retries do + execute("DROP VIEW IF EXISTS #{old_table_name}") + rename_table(new_table_name, old_table_name) + end + end + + def finalize_table_rename(old_table_name, new_table_name) + with_lock_retries do + execute("DROP VIEW IF EXISTS #{old_table_name}") + end + end + + def undo_finalize_table_rename(old_table_name, new_table_name) + with_lock_retries do + execute("CREATE VIEW #{old_table_name} AS SELECT * FROM #{new_table_name}") + end + end + end + end +end diff --git a/lib/gitlab/database/schema_cache_with_renamed_table.rb b/lib/gitlab/database/schema_cache_with_renamed_table.rb new file mode 100644 index 00000000000..28123edd708 --- /dev/null +++ b/lib/gitlab/database/schema_cache_with_renamed_table.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module SchemaCacheWithRenamedTable + # Override methods in ActiveRecord::ConnectionAdapters::SchemaCache + + def clear! + super + + clear_renamed_tables_cache! + end + + def clear_data_source_cache!(name) + super(name) + + clear_renamed_tables_cache! + end + + def primary_keys(table_name) + super(underlying_table(table_name)) + end + + def columns(table_name) + super(underlying_table(table_name)) + end + + def columns_hash(table_name) + super(underlying_table(table_name)) + end + + def indexes(table_name) + super(underlying_table(table_name)) + end + + private + + def underlying_table(table_name) + renamed_tables_cache.fetch(table_name, table_name) + end + + def renamed_tables_cache + @renamed_tables ||= begin + Gitlab::Database::TABLES_TO_BE_RENAMED.select do |old_name, new_name| + ActiveRecord::Base.connection.view_exists?(old_name) + end + end + end + + def clear_renamed_tables_cache! + @renamed_tables = nil # rubocop:disable Gitlab/ModuleWithInstanceVariables + end + end + end +end diff --git a/lib/gitlab/database/with_lock_retries.rb b/lib/gitlab/database/with_lock_retries.rb index 3fb52d786ad..bbf8f133f0f 100644 --- a/lib/gitlab/database/with_lock_retries.rb +++ b/lib/gitlab/database/with_lock_retries.rb @@ -92,7 +92,7 @@ module Gitlab end begin - run_block_with_transaction + run_block_with_lock_timeout rescue ActiveRecord::LockWaitTimeout if retry_with_lock_timeout? disable_idle_in_transaction_timeout if ActiveRecord::Base.connection.transaction_open? @@ -121,7 +121,7 @@ module Gitlab block.call end - def run_block_with_transaction + def run_block_with_lock_timeout ActiveRecord::Base.transaction(requires_new: true) do execute("SET LOCAL lock_timeout TO '#{current_lock_timeout_in_ms}ms'") diff --git a/lib/gitlab/database/with_lock_retries_outside_transaction.rb b/lib/gitlab/database/with_lock_retries_outside_transaction.rb new file mode 100644 index 00000000000..175cc493e36 --- /dev/null +++ b/lib/gitlab/database/with_lock_retries_outside_transaction.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module Gitlab + module Database + # This retry method behaves similar to WithLockRetries + # except it does not wrap itself into a transaction scope. + # + # In our context, this is only useful if directly connected to + # PostgreSQL. When going through pgbouncer, this method **won't work** + # as it relies on using `SET` outside transactions (and hence can be + # multiplexed across different connections). + class WithLockRetriesOutsideTransaction < WithLockRetries + private + + def run_block_with_lock_timeout + execute("SET lock_timeout TO '#{current_lock_timeout_in_ms}ms'") + + log(message: 'Lock timeout is set', current_iteration: current_iteration, lock_timeout_in_ms: current_lock_timeout_in_ms) + + run_block + + log(message: 'Migration finished', current_iteration: current_iteration, lock_timeout_in_ms: current_lock_timeout_in_ms) + end + + def run_block_without_lock_timeout + log(message: "Couldn't acquire lock to perform the migration", current_iteration: current_iteration) + log(message: "Executing without lock timeout", current_iteration: current_iteration) + + disable_lock_timeout + + run_block + + log(message: 'Migration finished', current_iteration: current_iteration) + end + + def disable_lock_timeout + execute("SET lock_timeout TO '0'") + end + end + end +end |