summaryrefslogtreecommitdiff
path: root/lib/gitlab/database
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-05-19 15:44:42 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2021-05-19 15:44:42 +0000
commit4555e1b21c365ed8303ffb7a3325d773c9b8bf31 (patch)
tree5423a1c7516cffe36384133ade12572cf709398d /lib/gitlab/database
parente570267f2f6b326480d284e0164a6464ba4081bc (diff)
downloadgitlab-ce-4555e1b21c365ed8303ffb7a3325d773c9b8bf31.tar.gz
Add latest changes from gitlab-org/gitlab@13-12-stable-eev13.12.0-rc42
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/as_with_materialized.rb14
-rw-r--r--lib/gitlab/database/background_migration/batch_optimizer.rb67
-rw-r--r--lib/gitlab/database/background_migration/batched_job.rb27
-rw-r--r--lib/gitlab/database/background_migration/batched_migration.rb41
-rw-r--r--lib/gitlab/database/background_migration/batched_migration_runner.rb24
-rw-r--r--lib/gitlab/database/background_migration/batched_migration_wrapper.rb44
-rw-r--r--lib/gitlab/database/background_migration_job.rb2
-rw-r--r--lib/gitlab/database/consistency.rb2
-rw-r--r--lib/gitlab/database/loose_index_scan_distinct_count.rb4
-rw-r--r--lib/gitlab/database/migration_helpers.rb155
-rw-r--r--lib/gitlab/database/migration_helpers/cascading_namespace_settings.rb76
-rw-r--r--lib/gitlab/database/migrations/instrumentation.rb7
-rw-r--r--lib/gitlab/database/migrations/observers.rb3
-rw-r--r--lib/gitlab/database/migrations/observers/query_log.rb27
-rw-r--r--lib/gitlab/database/partitioning/partition_creator.rb2
-rw-r--r--lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb2
-rw-r--r--lib/gitlab/database/postgres_hll/batch_distinct_counter.rb6
-rw-r--r--lib/gitlab/database/reindexing/concurrent_reindex.rb20
-rw-r--r--lib/gitlab/database/reindexing/coordinator.rb2
-rw-r--r--lib/gitlab/database/reindexing/grafana_notifier.rb2
-rw-r--r--lib/gitlab/database/rename_table_helpers.rb33
-rw-r--r--lib/gitlab/database/schema_cache_with_renamed_table.rb55
-rw-r--r--lib/gitlab/database/with_lock_retries.rb4
-rw-r--r--lib/gitlab/database/with_lock_retries_outside_transaction.rb41
24 files changed, 545 insertions, 115 deletions
diff --git a/lib/gitlab/database/as_with_materialized.rb b/lib/gitlab/database/as_with_materialized.rb
index 7c45f416638..e7e3c1766a9 100644
--- a/lib/gitlab/database/as_with_materialized.rb
+++ b/lib/gitlab/database/as_with_materialized.rb
@@ -3,19 +3,15 @@
module Gitlab
module Database
# This class is a special Arel node which allows optionally define the `MATERIALIZED` keyword for CTE and Recursive CTE queries.
- class AsWithMaterialized < Arel::Nodes::Binary
+ class AsWithMaterialized < Arel::Nodes::As
extend Gitlab::Utils::StrongMemoize
- MATERIALIZED = Arel.sql(' MATERIALIZED')
- EMPTY_STRING = Arel.sql('')
- attr_reader :expr
+ MATERIALIZED = 'MATERIALIZED '
def initialize(left, right, materialized: true)
- @expr = if materialized && self.class.materialized_supported?
- MATERIALIZED
- else
- EMPTY_STRING
- end
+ if materialized && self.class.materialized_supported?
+ right.prepend(MATERIALIZED)
+ end
super(left, right)
end
diff --git a/lib/gitlab/database/background_migration/batch_optimizer.rb b/lib/gitlab/database/background_migration/batch_optimizer.rb
new file mode 100644
index 00000000000..0668490dda8
--- /dev/null
+++ b/lib/gitlab/database/background_migration/batch_optimizer.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module BackgroundMigration
+ # This is an optimizer for throughput of batched migration jobs
+ #
+ # The underyling mechanic is based on the concept of time efficiency:
+ # time efficiency = job duration / interval
+ # Ideally, this is close but lower than 1 - so we're using time efficiently.
+ #
+ # We aim to land in the 90%-98% range, which gives the database a little breathing room
+ # in between.
+ #
+ # The optimizer is based on calculating the exponential moving average of time efficiencies
+ # for the last N jobs. If we're outside the range, we add 10% to or decrease by 20% of the batch size.
+ class BatchOptimizer
+ # Target time efficiency for a job
+ # Time efficiency is defined as: job duration / interval
+ TARGET_EFFICIENCY = (0.9..0.95).freeze
+
+ # Lower and upper bound for the batch size
+ ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze
+
+ # Limit for the multiplier of the batch size
+ MAX_MULTIPLIER = 1.2
+
+ # When smoothing time efficiency, use this many jobs
+ NUMBER_OF_JOBS = 20
+
+ # Smoothing factor for exponential moving average
+ EMA_ALPHA = 0.4
+
+ attr_reader :migration, :number_of_jobs, :ema_alpha
+
+ def initialize(migration, number_of_jobs: NUMBER_OF_JOBS, ema_alpha: EMA_ALPHA)
+ @migration = migration
+ @number_of_jobs = number_of_jobs
+ @ema_alpha = ema_alpha
+ end
+
+ def optimize!
+ return unless Feature.enabled?(:optimize_batched_migrations, type: :ops, default_enabled: :yaml)
+
+ if multiplier = batch_size_multiplier
+ migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(ALLOWED_BATCH_SIZE)
+ migration.save!
+ end
+ end
+
+ private
+
+ def batch_size_multiplier
+ efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs, alpha: ema_alpha)
+
+ return if efficiency.nil? || efficiency == 0
+
+ # We hit the range - no change
+ return if TARGET_EFFICIENCY.include?(efficiency)
+
+ # Assumption: time efficiency is linear in the batch size
+ [TARGET_EFFICIENCY.max / efficiency, MAX_MULTIPLIER].min
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb
index 3b624df2bfd..869b97b8ac0 100644
--- a/lib/gitlab/database/background_migration/batched_job.rb
+++ b/lib/gitlab/database/background_migration/batched_job.rb
@@ -4,10 +4,23 @@ module Gitlab
module Database
module BackgroundMigration
class BatchedJob < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord
+ include FromUnion
+
self.table_name = :batched_background_migration_jobs
+ MAX_ATTEMPTS = 3
+ STUCK_JOBS_TIMEOUT = 1.hour.freeze
+
belongs_to :batched_migration, foreign_key: :batched_background_migration_id
+ scope :active, -> { where(status: [:pending, :running]) }
+ scope :stuck, -> { active.where('updated_at <= ?', STUCK_JOBS_TIMEOUT.ago) }
+ scope :retriable, -> {
+ failed_jobs = where(status: :failed).where('attempts < ?', MAX_ATTEMPTS)
+
+ from_union([failed_jobs, self.stuck])
+ }
+
enum status: {
pending: 0,
running: 1,
@@ -15,8 +28,22 @@ module Gitlab
succeeded: 3
}
+ scope :successful_in_execution_order, -> { where.not(finished_at: nil).succeeded.order(:finished_at) }
+
delegate :aborted?, :job_class, :table_name, :column_name, :job_arguments,
to: :batched_migration, prefix: :migration
+
+ attribute :pause_ms, :integer, default: 100
+
+ def time_efficiency
+ return unless succeeded?
+ return unless finished_at && started_at
+
+ duration = finished_at - started_at
+
+ # TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801)
+ duration.to_f / batched_migration.interval
+ end
end
end
end
diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb
index 4aa33ed7946..e85162f355e 100644
--- a/lib/gitlab/database/background_migration/batched_migration.rb
+++ b/lib/gitlab/database/background_migration/batched_migration.rb
@@ -20,9 +20,12 @@ module Gitlab
paused: 0,
active: 1,
aborted: 2,
- finished: 3
+ finished: 3,
+ failed: 4
}
+ attribute :pause_ms, :integer, default: 100
+
def self.active_migration
active.queue_order.first
end
@@ -35,7 +38,13 @@ module Gitlab
end
def create_batched_job!(min, max)
- batched_jobs.create!(min_value: min, max_value: max, batch_size: batch_size, sub_batch_size: sub_batch_size)
+ batched_jobs.create!(
+ min_value: min,
+ max_value: max,
+ batch_size: batch_size,
+ sub_batch_size: sub_batch_size,
+ pause_ms: pause_ms
+ )
end
def next_min_value
@@ -58,12 +67,40 @@ module Gitlab
write_attribute(:batch_class_name, class_name.demodulize)
end
+ def migrated_tuple_count
+ batched_jobs.succeeded.sum(:batch_size)
+ end
+
def prometheus_labels
@prometheus_labels ||= {
migration_id: id,
migration_identifier: "%s/%s.%s" % [job_class_name, table_name, column_name]
}
end
+
+ def smoothed_time_efficiency(number_of_jobs: 10, alpha: 0.2)
+ jobs = batched_jobs.successful_in_execution_order.reverse_order.limit(number_of_jobs)
+
+ return if jobs.size < number_of_jobs
+
+ efficiencies = jobs.map(&:time_efficiency).reject(&:nil?).each_with_index
+
+ dividend = efficiencies.reduce(0) do |total, (job_eff, i)|
+ total + job_eff * (1 - alpha)**i
+ end
+
+ divisor = efficiencies.reduce(0) do |total, (job_eff, i)|
+ total + (1 - alpha)**i
+ end
+
+ return if divisor == 0
+
+ (dividend / divisor).round(2)
+ end
+
+ def optimize!
+ BatchOptimizer.new(self).optimize!
+ end
end
end
end
diff --git a/lib/gitlab/database/background_migration/batched_migration_runner.rb b/lib/gitlab/database/background_migration/batched_migration_runner.rb
index cf8b61f5feb..67fe6c536e6 100644
--- a/lib/gitlab/database/background_migration/batched_migration_runner.rb
+++ b/lib/gitlab/database/background_migration/batched_migration_runner.rb
@@ -19,8 +19,10 @@ module Gitlab
#
# Note that this method is primarily intended to called by a scheduled worker.
def run_migration_job(active_migration)
- if next_batched_job = create_next_batched_job!(active_migration)
+ if next_batched_job = find_or_create_next_batched_job(active_migration)
migration_wrapper.perform(next_batched_job)
+
+ active_migration.optimize!
else
finish_active_migration(active_migration)
end
@@ -46,12 +48,12 @@ module Gitlab
attr_reader :migration_wrapper
- def create_next_batched_job!(active_migration)
- next_batch_range = find_next_batch_range(active_migration)
-
- return if next_batch_range.nil?
-
- active_migration.create_batched_job!(next_batch_range.min, next_batch_range.max)
+ def find_or_create_next_batched_job(active_migration)
+ if next_batch_range = find_next_batch_range(active_migration)
+ active_migration.create_batched_job!(next_batch_range.min, next_batch_range.max)
+ else
+ active_migration.batched_jobs.retriable.first
+ end
end
def find_next_batch_range(active_migration)
@@ -80,7 +82,13 @@ module Gitlab
end
def finish_active_migration(active_migration)
- active_migration.finished!
+ return if active_migration.batched_jobs.active.exists?
+
+ if active_migration.batched_jobs.failed.exists?
+ active_migration.failed!
+ else
+ active_migration.finished!
+ end
end
end
end
diff --git a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb
index c276f8ce75b..e37df102872 100644
--- a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb
+++ b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb
@@ -19,10 +19,10 @@ module Gitlab
execute_batch(batch_tracking_record)
batch_tracking_record.status = :succeeded
- rescue => e
+ rescue Exception # rubocop:disable Lint/RescueException
batch_tracking_record.status = :failed
- raise e
+ raise
ensure
finish_tracking_execution(batch_tracking_record)
track_prometheus_metrics(batch_tracking_record)
@@ -31,7 +31,7 @@ module Gitlab
private
def start_tracking_execution(tracking_record)
- tracking_record.update!(attempts: tracking_record.attempts + 1, status: :running, started_at: Time.current)
+ tracking_record.update!(attempts: tracking_record.attempts + 1, status: :running, started_at: Time.current, finished_at: nil, metrics: {})
end
def execute_batch(tracking_record)
@@ -43,6 +43,7 @@ module Gitlab
tracking_record.migration_table_name,
tracking_record.migration_column_name,
tracking_record.sub_batch_size,
+ tracking_record.pause_ms,
*tracking_record.migration_job_arguments)
if job_instance.respond_to?(:batch_metrics)
@@ -61,11 +62,12 @@ module Gitlab
metric_for(:gauge_batch_size).set(base_labels, tracking_record.batch_size)
metric_for(:gauge_sub_batch_size).set(base_labels, tracking_record.sub_batch_size)
+ metric_for(:gauge_interval).set(base_labels, tracking_record.batched_migration.interval)
+ metric_for(:gauge_job_duration).set(base_labels, (tracking_record.finished_at - tracking_record.started_at).to_i)
metric_for(:counter_updated_tuples).increment(base_labels, tracking_record.batch_size)
-
- # Time efficiency: Ratio of duration to interval (ideal: less than, but close to 1)
- efficiency = (tracking_record.finished_at - tracking_record.started_at).to_i / migration.interval.to_f
- metric_for(:histogram_time_efficiency).observe(base_labels, efficiency)
+ metric_for(:gauge_migrated_tuples).set(base_labels, tracking_record.batched_migration.migrated_tuple_count)
+ metric_for(:gauge_total_tuple_count).set(base_labels, tracking_record.batched_migration.total_tuple_count)
+ metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i)
if metrics = tracking_record.metrics
metrics['timings']&.each do |key, timings|
@@ -94,21 +96,35 @@ module Gitlab
:batched_migration_job_sub_batch_size,
'Sub-batch size for a batched migration job'
),
+ gauge_interval: Gitlab::Metrics.gauge(
+ :batched_migration_job_interval_seconds,
+ 'Interval for a batched migration job'
+ ),
+ gauge_job_duration: Gitlab::Metrics.gauge(
+ :batched_migration_job_duration_seconds,
+ 'Duration for a batched migration job'
+ ),
counter_updated_tuples: Gitlab::Metrics.counter(
:batched_migration_job_updated_tuples_total,
'Number of tuples updated by batched migration job'
),
+ gauge_migrated_tuples: Gitlab::Metrics.gauge(
+ :batched_migration_migrated_tuples_total,
+ 'Total number of tuples migrated by a batched migration'
+ ),
histogram_timings: Gitlab::Metrics.histogram(
- :batched_migration_job_duration_seconds,
- 'Timings for a batched migration job',
+ :batched_migration_job_query_duration_seconds,
+ 'Query timings for a batched migration job',
{},
[0.1, 0.25, 0.5, 1, 5].freeze
),
- histogram_time_efficiency: Gitlab::Metrics.histogram(
- :batched_migration_job_time_efficiency,
- 'Ratio of job duration to interval',
- {},
- [0.5, 0.9, 1, 1.5, 2].freeze
+ gauge_total_tuple_count: Gitlab::Metrics.gauge(
+ :batched_migration_total_tuple_count,
+ 'Total tuple count the migration needs to touch'
+ ),
+ gauge_last_update_time: Gitlab::Metrics.gauge(
+ :batched_migration_last_update_time_seconds,
+ 'Unix epoch time in seconds'
)
}
end
diff --git a/lib/gitlab/database/background_migration_job.rb b/lib/gitlab/database/background_migration_job.rb
index 1b9d7cbc9a1..1121793917b 100644
--- a/lib/gitlab/database/background_migration_job.rb
+++ b/lib/gitlab/database/background_migration_job.rb
@@ -9,7 +9,7 @@ module Gitlab
scope :for_migration_class, -> (class_name) { where(class_name: normalize_class_name(class_name)) }
scope :for_migration_execution, -> (class_name, arguments) do
- for_migration_class(class_name).where('arguments = ?', arguments.to_json)
+ for_migration_class(class_name).where('arguments = ?', arguments.to_json) # rubocop:disable Rails/WhereEquals
end
scope :for_partitioning_migration, -> (class_name, table_name) do
diff --git a/lib/gitlab/database/consistency.rb b/lib/gitlab/database/consistency.rb
index b7d06a26ddb..e99ea7a3232 100644
--- a/lib/gitlab/database/consistency.rb
+++ b/lib/gitlab/database/consistency.rb
@@ -28,4 +28,4 @@ module Gitlab
end
end
-::Gitlab::Database::Consistency.singleton_class.prepend_if_ee('EE::Gitlab::Database::Consistency')
+::Gitlab::Database::Consistency.singleton_class.prepend_mod_with('Gitlab::Database::Consistency')
diff --git a/lib/gitlab/database/loose_index_scan_distinct_count.rb b/lib/gitlab/database/loose_index_scan_distinct_count.rb
index 884f4d47ff8..26be07f91c4 100644
--- a/lib/gitlab/database/loose_index_scan_distinct_count.rb
+++ b/lib/gitlab/database/loose_index_scan_distinct_count.rb
@@ -11,7 +11,7 @@ module Gitlab
# This query will read each element in the index matching the project_id filter.
# If for a project_id has 100_000 issues, all 100_000 elements will be read.
#
- # A loose index scan will read only one entry from the index for each project_id to reduce the number of disk reads.
+ # A loose index scan will only read one entry from the index for each project_id to reduce the number of disk reads.
#
# Usage:
#
@@ -94,7 +94,7 @@ module Gitlab
elsif column.is_a?(Arel::Attributes::Attribute)
column
else
- raise ColumnConfigurationError.new("Cannot transform the column: #{column.inspect}, please provide the column name as string")
+ raise ColumnConfigurationError, "Cannot transform the column: #{column.inspect}, please provide the column name as string"
end
end
end
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index d06a73da8ac..3a94e109d2a 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -5,6 +5,7 @@ module Gitlab
module MigrationHelpers
include Migrations::BackgroundMigrationHelpers
include DynamicModelHelpers
+ include Migrations::RenameTableHelpers
# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
MAX_IDENTIFIER_NAME_LENGTH = 63
@@ -51,7 +52,7 @@ module Gitlab
allow_null: options[:null]
)
else
- add_column(table_name, column_name, :datetime_with_timezone, options)
+ add_column(table_name, column_name, :datetime_with_timezone, **options)
end
end
end
@@ -143,13 +144,13 @@ module Gitlab
options = options.merge({ algorithm: :concurrently })
- if index_exists?(table_name, column_name, options)
+ if index_exists?(table_name, column_name, **options)
Gitlab::AppLogger.warn "Index not created because it already exists (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}"
return
end
disable_statement_timeout do
- add_index(table_name, column_name, options)
+ add_index(table_name, column_name, **options)
end
end
@@ -169,13 +170,13 @@ module Gitlab
options = options.merge({ algorithm: :concurrently })
- unless index_exists?(table_name, column_name, options)
+ unless index_exists?(table_name, column_name, **options)
Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted migration or similar): table_name: #{table_name}, column_name: #{column_name}"
return
end
disable_statement_timeout do
- remove_index(table_name, options.merge({ column: column_name }))
+ remove_index(table_name, **options.merge({ column: column_name }))
end
end
@@ -205,7 +206,7 @@ module Gitlab
end
disable_statement_timeout do
- remove_index(table_name, options.merge({ name: index_name }))
+ remove_index(table_name, **options.merge({ name: index_name }))
end
end
@@ -565,7 +566,7 @@ module Gitlab
check_trigger_permissions!(table)
- remove_rename_triggers_for_postgresql(table, trigger_name)
+ remove_rename_triggers(table, trigger_name)
remove_column(table, new)
end
@@ -576,8 +577,19 @@ module Gitlab
# table - The name of the table to install the trigger in.
# old_column - The name of the old column.
# new_column - The name of the new column.
- def install_rename_triggers(table, old_column, new_column)
- install_rename_triggers_for_postgresql(table, old_column, new_column)
+ # trigger_name - The name of the trigger to use (optional).
+ def install_rename_triggers(table, old, new, trigger_name: nil)
+ Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).create(old, new, trigger_name: trigger_name)
+ end
+
+ # Removes the triggers used for renaming a column concurrently.
+ def remove_rename_triggers(table, trigger)
+ Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).drop(trigger)
+ end
+
+ # Returns the (base) name to use for triggers when renaming columns.
+ def rename_trigger_name(table, old, new)
+ Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).name(old, new)
end
# Changes the type of a column concurrently.
@@ -663,7 +675,7 @@ module Gitlab
install_rename_triggers(table, column, temp_column)
end
- rescue
+ rescue StandardError
# create_column_from can not run inside a transaction, which means
# that there is a risk that if any of the operations that follow it
# fail, we'll be left with an inconsistent schema
@@ -690,7 +702,7 @@ module Gitlab
check_trigger_permissions!(table)
- remove_rename_triggers_for_postgresql(table, trigger_name)
+ remove_rename_triggers(table, trigger_name)
remove_column(table, old)
end
@@ -905,7 +917,11 @@ module Gitlab
end
end
- # Initializes the conversion of an integer column to bigint
+ def convert_to_bigint_column(column)
+ "#{column}_convert_to_bigint"
+ end
+
+ # Initializes the conversion of a set of integer columns to bigint
#
# It can be used for converting both a Primary Key and any Foreign Keys
# that may reference it or any other integer column that we may want to
@@ -923,14 +939,14 @@ module Gitlab
# Note: this helper is intended to be used in a regular (pre-deployment) migration.
#
# This helper is part 1 of a multi-step migration process:
- # 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers
+ # 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
#
# table - The name of the database table containing the column
- # column - The name of the column that we want to convert to bigint.
+ # columns - The name, or array of names, of the column(s) that we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
- def initialize_conversion_of_integer_to_bigint(table, column, primary_key: :id)
+ def initialize_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
unless table_exists?(table)
raise "Table #{table} does not exist"
end
@@ -939,34 +955,54 @@ module Gitlab
raise "Column #{primary_key} does not exist on #{table}"
end
- unless column_exists?(table, column)
- raise "Column #{column} does not exist on #{table}"
+ columns = Array.wrap(columns)
+ columns.each do |column|
+ next if column_exists?(table, column)
+
+ raise ArgumentError, "Column #{column} does not exist on #{table}"
end
check_trigger_permissions!(table)
- old_column = column_for(table, column)
- tmp_column = "#{column}_convert_to_bigint"
+ conversions = columns.to_h { |column| [column, convert_to_bigint_column(column)] }
with_lock_retries do
- if (column.to_s == primary_key.to_s) || !old_column.null
- # If the column to be converted is either a PK or is defined as NOT NULL,
- # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
- # That way, we skip the expensive validation step required to add
- # a NOT NULL constraint at the end of the process
- add_column(table, tmp_column, :bigint, default: old_column.default || 0, null: false)
- else
- add_column(table, tmp_column, :bigint, default: old_column.default)
+ conversions.each do |(source_column, temporary_name)|
+ column = column_for(table, source_column)
+
+ if (column.name.to_s == primary_key.to_s) || !column.null
+ # If the column to be converted is either a PK or is defined as NOT NULL,
+ # set it to `NOT NULL DEFAULT 0` and we'll copy paste the correct values bellow
+ # That way, we skip the expensive validation step required to add
+ # a NOT NULL constraint at the end of the process
+ add_column(table, temporary_name, :bigint, default: column.default || 0, null: false)
+ else
+ add_column(table, temporary_name, :bigint, default: column.default)
+ end
end
- install_rename_triggers(table, column, tmp_column)
+ install_rename_triggers(table, conversions.keys, conversions.values)
end
end
- # Backfills the new column used in the conversion of an integer column to bigint using background migrations.
+ # Reverts `initialize_conversion_of_integer_to_bigint`
+ #
+ # table - The name of the database table containing the columns
+ # columns - The name, or array of names, of the column(s) that we're converting to bigint.
+ def revert_initialize_conversion_of_integer_to_bigint(table, columns)
+ columns = Array.wrap(columns)
+ temporary_columns = columns.map { |column| convert_to_bigint_column(column) }
+
+ trigger_name = rename_trigger_name(table, columns, temporary_columns)
+ remove_rename_triggers(table, trigger_name)
+
+ temporary_columns.each { |column| remove_column(table, column) }
+ end
+
+ # Backfills the new columns used in an integer-to-bigint conversion using background migrations.
#
# - This helper should be called from a post-deployment migration.
- # - In order for this helper to work properly, the new column must be first initialized with
+ # - In order for this helper to work properly, the new columns must be first initialized with
# the `initialize_conversion_of_integer_to_bigint` helper.
# - It tracks the scheduled background jobs through Gitlab::Database::BackgroundMigration::BatchedMigration,
# which allows a more thorough check that all jobs succeeded in the
@@ -976,12 +1012,12 @@ module Gitlab
# deployed (including background job changes) before we begin processing the background migration.
#
# This helper is part 2 of a multi-step migration process:
- # 1. initialize_conversion_of_integer_to_bigint to create the new column and database triggers
+ # 1. initialize_conversion_of_integer_to_bigint to create the new columns and database trigger
# 2. backfill_conversion_of_integer_to_bigint to copy historic data using background migrations
# 3. remaining steps TBD, see #288005
#
# table - The name of the database table containing the column
- # column - The name of the column that we want to convert to bigint.
+ # columns - The name, or an array of names, of the column(s) we want to convert to bigint.
# primary_key - The name of the primary key column (most often :id)
# batch_size - The number of rows to schedule in a single background migration
# sub_batch_size - The smaller batches that will be used by each scheduled job
@@ -1001,7 +1037,7 @@ module Gitlab
# between the scheduled jobs
def backfill_conversion_of_integer_to_bigint(
table,
- column,
+ columns,
primary_key: :id,
batch_size: 20_000,
sub_batch_size: 1000,
@@ -1016,46 +1052,43 @@ module Gitlab
raise "Column #{primary_key} does not exist on #{table}"
end
- unless column_exists?(table, column)
- raise "Column #{column} does not exist on #{table}"
- end
+ conversions = Array.wrap(columns).to_h do |column|
+ raise ArgumentError, "Column #{column} does not exist on #{table}" unless column_exists?(table, column)
- tmp_column = "#{column}_convert_to_bigint"
+ temporary_name = convert_to_bigint_column(column)
+ raise ArgumentError, "Column #{temporary_name} does not exist on #{table}" unless column_exists?(table, temporary_name)
- unless column_exists?(table, tmp_column)
- raise 'The temporary column does not exist, initialize it with `initialize_conversion_of_integer_to_bigint`'
+ [column, temporary_name]
end
- batched_migration = queue_batched_background_migration(
+ queue_batched_background_migration(
'CopyColumnUsingBackgroundMigrationJob',
table,
primary_key,
- column,
- tmp_column,
+ conversions.keys,
+ conversions.values,
job_interval: interval,
batch_size: batch_size,
sub_batch_size: sub_batch_size)
-
- if perform_background_migration_inline?
- # To ensure the schema is up to date immediately we perform the
- # migration inline in dev / test environments.
- Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new.run_entire_migration(batched_migration)
- end
end
- # Performs a concurrent column rename when using PostgreSQL.
- def install_rename_triggers_for_postgresql(table, old, new, trigger_name: nil)
- Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).create(old, new, trigger_name: trigger_name)
- end
+ # Reverts `backfill_conversion_of_integer_to_bigint`
+ #
+ # table - The name of the database table containing the column
+ # columns - The name, or an array of names, of the column(s) we want to convert to bigint.
+ # primary_key - The name of the primary key column (most often :id)
+ def revert_backfill_conversion_of_integer_to_bigint(table, columns, primary_key: :id)
+ columns = Array.wrap(columns)
- # Removes the triggers used for renaming a PostgreSQL column concurrently.
- def remove_rename_triggers_for_postgresql(table, trigger)
- Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).drop(trigger)
- end
+ conditions = ActiveRecord::Base.sanitize_sql([
+ 'job_class_name = :job_class_name AND table_name = :table_name AND column_name = :column_name AND job_arguments = :job_arguments',
+ job_class_name: 'CopyColumnUsingBackgroundMigrationJob',
+ table_name: table,
+ column_name: primary_key,
+ job_arguments: [columns, columns.map { |column| convert_to_bigint_column(column) }].to_json
+ ])
- # Returns the (base) name to use for triggers when renaming columns.
- def rename_trigger_name(table, old, new)
- Gitlab::Database::UnidirectionalCopyTrigger.on_table(table).name(old, new)
+ execute("DELETE FROM batched_background_migrations WHERE #{conditions}")
end
# Returns an Array containing the indexes for the given column
@@ -1162,8 +1195,8 @@ module Gitlab
end
end
- def remove_foreign_key_without_error(*args)
- remove_foreign_key(*args)
+ def remove_foreign_key_without_error(*args, **kwargs)
+ remove_foreign_key(*args, **kwargs)
rescue ArgumentError
end
diff --git a/lib/gitlab/database/migration_helpers/cascading_namespace_settings.rb b/lib/gitlab/database/migration_helpers/cascading_namespace_settings.rb
new file mode 100644
index 00000000000..eecf96acb30
--- /dev/null
+++ b/lib/gitlab/database/migration_helpers/cascading_namespace_settings.rb
@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module MigrationHelpers
+ module CascadingNamespaceSettings
+ include Gitlab::Database::MigrationHelpers
+
+ # Creates the four required columns that constitutes a single cascading
+ # namespace settings attribute. This helper is only appropriate if the
+ # setting is not already present as a non-cascading attribute.
+ #
+ # Creates the `setting_name` column along with the `lock_setting_name`
+ # column in both `namespace_settings` and `application_settings`.
+ #
+ # This helper is not reversible and must be defined in conjunction with
+ # `remove_cascading_namespace_setting` in separate up and down directions.
+ #
+ # setting_name - The name of the cascading attribute - same as defined
+ # in `NamespaceSetting` with the `cascading_attr` method.
+ # type - The column type for the setting itself (:boolean, :integer, etc.)
+ # options - Standard Rails column options hash. Accepts keys such as
+ # `null` and `default`.
+ #
+ # `null` and `default` options will only be applied to the `application_settings`
+ # column. In most cases, a non-null default value should be specified.
+ def add_cascading_namespace_setting(setting_name, type, **options)
+ lock_column_name = "lock_#{setting_name}".to_sym
+
+ check_cascading_namespace_setting_consistency(setting_name, lock_column_name)
+
+ namespace_options = options.merge(null: true, default: nil)
+
+ with_lock_retries do
+ add_column(:namespace_settings, setting_name, type, namespace_options)
+ add_column(:namespace_settings, lock_column_name, :boolean, default: false, null: false)
+ end
+
+ add_column(:application_settings, setting_name, type, options)
+ add_column(:application_settings, lock_column_name, :boolean, default: false, null: false)
+ end
+
+ def remove_cascading_namespace_setting(setting_name)
+ lock_column_name = "lock_#{setting_name}".to_sym
+
+ with_lock_retries do
+ remove_column(:namespace_settings, setting_name) if column_exists?(:namespace_settings, setting_name)
+ remove_column(:namespace_settings, lock_column_name) if column_exists?(:namespace_settings, lock_column_name)
+ end
+
+ remove_column(:application_settings, setting_name) if column_exists?(:application_settings, setting_name)
+ remove_column(:application_settings, lock_column_name) if column_exists?(:application_settings, lock_column_name)
+ end
+
+ private
+
+ def check_cascading_namespace_setting_consistency(setting_name, lock_name)
+ existing_columns = []
+
+ %w(namespace_settings application_settings).each do |table|
+ existing_columns << "#{table}.#{setting_name}" if column_exists?(table.to_sym, setting_name)
+ existing_columns << "#{table}.#{lock_name}" if column_exists?(table.to_sym, lock_name)
+ end
+
+ return if existing_columns.empty?
+
+ raise <<~ERROR
+ One or more cascading namespace columns already exist. `add_cascading_namespace_setting` helper
+ can only be used for new settings, when none of the required columns already exist.
+ Existing columns: #{existing_columns.join(', ')}
+ ERROR
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/migrations/instrumentation.rb b/lib/gitlab/database/migrations/instrumentation.rb
index 959028ce00b..e9ef80d5198 100644
--- a/lib/gitlab/database/migrations/instrumentation.rb
+++ b/lib/gitlab/database/migrations/instrumentation.rb
@@ -4,6 +4,9 @@ module Gitlab
module Database
module Migrations
class Instrumentation
+ RESULT_DIR = Rails.root.join('tmp', 'migration-testing').freeze
+ STATS_FILENAME = 'migration-stats.json'
+
attr_reader :observations
def initialize(observers = ::Gitlab::Database::Migrations::Observers.all_observers)
@@ -21,7 +24,7 @@ module Gitlab
observation.walltime = Benchmark.realtime do
yield
- rescue => e
+ rescue StandardError => e
exception = e
observation.success = false
end
@@ -47,7 +50,7 @@ module Gitlab
def on_each_observer(&block)
observers.each do |observer|
yield observer
- rescue => e
+ rescue StandardError => e
Gitlab::AppLogger.error("Migration observer #{observer.class} failed with: #{e}")
end
end
diff --git a/lib/gitlab/database/migrations/observers.rb b/lib/gitlab/database/migrations/observers.rb
index 592993aeac5..b65a303ef30 100644
--- a/lib/gitlab/database/migrations/observers.rb
+++ b/lib/gitlab/database/migrations/observers.rb
@@ -7,7 +7,8 @@ module Gitlab
def self.all_observers
[
TotalDatabaseSizeChange.new,
- QueryStatistics.new
+ QueryStatistics.new,
+ QueryLog.new
]
end
end
diff --git a/lib/gitlab/database/migrations/observers/query_log.rb b/lib/gitlab/database/migrations/observers/query_log.rb
new file mode 100644
index 00000000000..45df07fe391
--- /dev/null
+++ b/lib/gitlab/database/migrations/observers/query_log.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Migrations
+ module Observers
+ class QueryLog < MigrationObserver
+ def before
+ @logger_was = ActiveRecord::Base.logger
+ @log_file_path = File.join(Instrumentation::RESULT_DIR, 'current.log')
+ @logger = Logger.new(@log_file_path)
+ ActiveRecord::Base.logger = @logger
+ end
+
+ def after
+ ActiveRecord::Base.logger = @logger_was
+ @logger.close
+ end
+
+ def record(observation)
+ File.rename(@log_file_path, File.join(Instrumentation::RESULT_DIR, "#{observation.migration}.log"))
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/partitioning/partition_creator.rb b/lib/gitlab/database/partitioning/partition_creator.rb
index 547e0b9b957..d4b2b8d50e2 100644
--- a/lib/gitlab/database/partitioning/partition_creator.rb
+++ b/lib/gitlab/database/partitioning/partition_creator.rb
@@ -38,7 +38,7 @@ module Gitlab
create(model, partitions_to_create)
end
- rescue => e
+ rescue StandardError => e
Gitlab::AppLogger.error("Failed to create partition(s) for #{model.table_name}: #{e.class}: #{e.message}")
end
end
diff --git a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
index 0bc1343acca..c0cc97de276 100644
--- a/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
+++ b/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
@@ -40,7 +40,7 @@ module Gitlab
end
with_lock_retries do
- add_index(table_name, column_names, options)
+ add_index(table_name, column_names, **options)
end
end
diff --git a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
index e8b49c7f62c..aa46b98be5d 100644
--- a/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
+++ b/lib/gitlab/database/postgres_hll/batch_distinct_counter.rb
@@ -69,10 +69,8 @@ module Gitlab
hll_buckets = Buckets.new
while batch_start <= finish
- begin
- hll_buckets.merge_hash!(hll_buckets_for_batch(batch_start, batch_start + batch_size))
- batch_start += batch_size
- end
+ hll_buckets.merge_hash!(hll_buckets_for_batch(batch_start, batch_start + batch_size))
+ batch_start += batch_size
sleep(SLEEP_TIME_IN_SECONDS)
end
diff --git a/lib/gitlab/database/reindexing/concurrent_reindex.rb b/lib/gitlab/database/reindexing/concurrent_reindex.rb
index a6fe7d61a4f..7e2dd55d21b 100644
--- a/lib/gitlab/database/reindexing/concurrent_reindex.rb
+++ b/lib/gitlab/database/reindexing/concurrent_reindex.rb
@@ -11,7 +11,14 @@ module Gitlab
PG_IDENTIFIER_LENGTH = 63
TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
REPLACED_INDEX_PREFIX = 'old_reindex_'
- STATEMENT_TIMEOUT = 6.hours
+ STATEMENT_TIMEOUT = 9.hours
+
+ # When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
+ # which only conflicts with DDL and vacuum. We therefore execute this with a rather
+ # high lock timeout and a long pause in between retries. This is an alternative to
+ # setting a high statement timeout, which would lead to a long running query with effects
+ # on e.g. vacuum.
+ REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
attr_reader :index, :logger
@@ -70,7 +77,7 @@ module Gitlab
ensure
begin
remove_index(index.schema, replacement_index_name)
- rescue => e
+ rescue StandardError => e
logger.error(e)
end
end
@@ -95,7 +102,13 @@ module Gitlab
def remove_index(schema, name)
logger.info("Removing index #{schema}.#{name}")
- set_statement_timeout do
+ retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
+ timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
+ klass: self.class,
+ logger: logger
+ )
+
+ retries.run(raise_on_exhaustion: false) do
connection.execute(<<~SQL)
DROP INDEX CONCURRENTLY
IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)}
@@ -121,7 +134,6 @@ module Gitlab
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
-
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end
diff --git a/lib/gitlab/database/reindexing/coordinator.rb b/lib/gitlab/database/reindexing/coordinator.rb
index 7a7d17ca196..d68f47b5b6c 100644
--- a/lib/gitlab/database/reindexing/coordinator.rb
+++ b/lib/gitlab/database/reindexing/coordinator.rb
@@ -42,7 +42,7 @@ module Gitlab
def perform_for(index, action)
ConcurrentReindex.new(index).perform
- rescue
+ rescue StandardError
action.state = :failed
raise
diff --git a/lib/gitlab/database/reindexing/grafana_notifier.rb b/lib/gitlab/database/reindexing/grafana_notifier.rb
index b1e5ecb9ade..f4ea59deb50 100644
--- a/lib/gitlab/database/reindexing/grafana_notifier.rb
+++ b/lib/gitlab/database/reindexing/grafana_notifier.rb
@@ -53,7 +53,7 @@ module Gitlab
log_error("Response code #{response.code}") unless success
success
- rescue => err
+ rescue StandardError => err
log_error(err)
false
diff --git a/lib/gitlab/database/rename_table_helpers.rb b/lib/gitlab/database/rename_table_helpers.rb
new file mode 100644
index 00000000000..7f5af038c6d
--- /dev/null
+++ b/lib/gitlab/database/rename_table_helpers.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module RenameTableHelpers
+ def rename_table_safely(old_table_name, new_table_name)
+ with_lock_retries do
+ rename_table(old_table_name, new_table_name)
+ execute("CREATE VIEW #{old_table_name} AS SELECT * FROM #{new_table_name}")
+ end
+ end
+
+ def undo_rename_table_safely(old_table_name, new_table_name)
+ with_lock_retries do
+ execute("DROP VIEW IF EXISTS #{old_table_name}")
+ rename_table(new_table_name, old_table_name)
+ end
+ end
+
+ def finalize_table_rename(old_table_name, new_table_name)
+ with_lock_retries do
+ execute("DROP VIEW IF EXISTS #{old_table_name}")
+ end
+ end
+
+ def undo_finalize_table_rename(old_table_name, new_table_name)
+ with_lock_retries do
+ execute("CREATE VIEW #{old_table_name} AS SELECT * FROM #{new_table_name}")
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/schema_cache_with_renamed_table.rb b/lib/gitlab/database/schema_cache_with_renamed_table.rb
new file mode 100644
index 00000000000..28123edd708
--- /dev/null
+++ b/lib/gitlab/database/schema_cache_with_renamed_table.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module SchemaCacheWithRenamedTable
+ # Override methods in ActiveRecord::ConnectionAdapters::SchemaCache
+
+ def clear!
+ super
+
+ clear_renamed_tables_cache!
+ end
+
+ def clear_data_source_cache!(name)
+ super(name)
+
+ clear_renamed_tables_cache!
+ end
+
+ def primary_keys(table_name)
+ super(underlying_table(table_name))
+ end
+
+ def columns(table_name)
+ super(underlying_table(table_name))
+ end
+
+ def columns_hash(table_name)
+ super(underlying_table(table_name))
+ end
+
+ def indexes(table_name)
+ super(underlying_table(table_name))
+ end
+
+ private
+
+ def underlying_table(table_name)
+ renamed_tables_cache.fetch(table_name, table_name)
+ end
+
+ def renamed_tables_cache
+ @renamed_tables ||= begin
+ Gitlab::Database::TABLES_TO_BE_RENAMED.select do |old_name, new_name|
+ ActiveRecord::Base.connection.view_exists?(old_name)
+ end
+ end
+ end
+
+ def clear_renamed_tables_cache!
+ @renamed_tables = nil # rubocop:disable Gitlab/ModuleWithInstanceVariables
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/with_lock_retries.rb b/lib/gitlab/database/with_lock_retries.rb
index 3fb52d786ad..bbf8f133f0f 100644
--- a/lib/gitlab/database/with_lock_retries.rb
+++ b/lib/gitlab/database/with_lock_retries.rb
@@ -92,7 +92,7 @@ module Gitlab
end
begin
- run_block_with_transaction
+ run_block_with_lock_timeout
rescue ActiveRecord::LockWaitTimeout
if retry_with_lock_timeout?
disable_idle_in_transaction_timeout if ActiveRecord::Base.connection.transaction_open?
@@ -121,7 +121,7 @@ module Gitlab
block.call
end
- def run_block_with_transaction
+ def run_block_with_lock_timeout
ActiveRecord::Base.transaction(requires_new: true) do
execute("SET LOCAL lock_timeout TO '#{current_lock_timeout_in_ms}ms'")
diff --git a/lib/gitlab/database/with_lock_retries_outside_transaction.rb b/lib/gitlab/database/with_lock_retries_outside_transaction.rb
new file mode 100644
index 00000000000..175cc493e36
--- /dev/null
+++ b/lib/gitlab/database/with_lock_retries_outside_transaction.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ # This retry method behaves similar to WithLockRetries
+ # except it does not wrap itself into a transaction scope.
+ #
+ # In our context, this is only useful if directly connected to
+ # PostgreSQL. When going through pgbouncer, this method **won't work**
+ # as it relies on using `SET` outside transactions (and hence can be
+ # multiplexed across different connections).
+ class WithLockRetriesOutsideTransaction < WithLockRetries
+ private
+
+ def run_block_with_lock_timeout
+ execute("SET lock_timeout TO '#{current_lock_timeout_in_ms}ms'")
+
+ log(message: 'Lock timeout is set', current_iteration: current_iteration, lock_timeout_in_ms: current_lock_timeout_in_ms)
+
+ run_block
+
+ log(message: 'Migration finished', current_iteration: current_iteration, lock_timeout_in_ms: current_lock_timeout_in_ms)
+ end
+
+ def run_block_without_lock_timeout
+ log(message: "Couldn't acquire lock to perform the migration", current_iteration: current_iteration)
+ log(message: "Executing without lock timeout", current_iteration: current_iteration)
+
+ disable_lock_timeout
+
+ run_block
+
+ log(message: 'Migration finished', current_iteration: current_iteration)
+ end
+
+ def disable_lock_timeout
+ execute("SET lock_timeout TO '0'")
+ end
+ end
+ end
+end