diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-03-16 18:18:33 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-03-16 18:18:33 +0000 |
commit | f64a639bcfa1fc2bc89ca7db268f594306edfd7c (patch) | |
tree | a2c3c2ebcc3b45e596949db485d6ed18ffaacfa1 /lib/gitlab/database | |
parent | bfbc3e0d6583ea1a91f627528bedc3d65ba4b10f (diff) | |
download | gitlab-ce-f64a639bcfa1fc2bc89ca7db268f594306edfd7c.tar.gz |
Add latest changes from gitlab-org/gitlab@13-10-stable-eev13.10.0-rc40
Diffstat (limited to 'lib/gitlab/database')
11 files changed, 319 insertions, 11 deletions
diff --git a/lib/gitlab/database/background_migration/batched_job.rb b/lib/gitlab/database/background_migration/batched_job.rb new file mode 100644 index 00000000000..3b624df2bfd --- /dev/null +++ b/lib/gitlab/database/background_migration/batched_job.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module BackgroundMigration + class BatchedJob < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord + self.table_name = :batched_background_migration_jobs + + belongs_to :batched_migration, foreign_key: :batched_background_migration_id + + enum status: { + pending: 0, + running: 1, + failed: 2, + succeeded: 3 + } + + delegate :aborted?, :job_class, :table_name, :column_name, :job_arguments, + to: :batched_migration, prefix: :migration + end + end + end +end diff --git a/lib/gitlab/database/background_migration/batched_migration.rb b/lib/gitlab/database/background_migration/batched_migration.rb new file mode 100644 index 00000000000..0c9add9b355 --- /dev/null +++ b/lib/gitlab/database/background_migration/batched_migration.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module BackgroundMigration + class BatchedMigration < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord + JOB_CLASS_MODULE = 'Gitlab::BackgroundMigration' + BATCH_CLASS_MODULE = "#{JOB_CLASS_MODULE}::BatchingStrategies".freeze + + self.table_name = :batched_background_migrations + + has_many :batched_jobs, foreign_key: :batched_background_migration_id + has_one :last_job, -> { order(id: :desc) }, + class_name: 'Gitlab::Database::BackgroundMigration::BatchedJob', + foreign_key: :batched_background_migration_id + + scope :queue_order, -> { order(id: :asc) } + + enum status: { + paused: 0, + active: 1, + aborted: 2, + finished: 3 + } + + def interval_elapsed? + last_job.nil? || last_job.created_at <= Time.current - interval + end + + def create_batched_job!(min, max) + batched_jobs.create!(min_value: min, max_value: max, batch_size: batch_size, sub_batch_size: sub_batch_size) + end + + def next_min_value + last_job&.max_value&.next || min_value + end + + def job_class + "#{JOB_CLASS_MODULE}::#{job_class_name}".constantize + end + + def batch_class + "#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize + end + + def job_class_name=(class_name) + write_attribute(:job_class_name, class_name.demodulize) + end + + def batch_class_name=(class_name) + write_attribute(:batch_class_name, class_name.demodulize) + end + end + end + end +end diff --git a/lib/gitlab/database/background_migration/batched_migration_wrapper.rb b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb new file mode 100644 index 00000000000..299bd992197 --- /dev/null +++ b/lib/gitlab/database/background_migration/batched_migration_wrapper.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module BackgroundMigration + class BatchedMigrationWrapper + def perform(batch_tracking_record) + start_tracking_execution(batch_tracking_record) + + execute_batch(batch_tracking_record) + + batch_tracking_record.status = :succeeded + rescue => e + batch_tracking_record.status = :failed + + raise e + ensure + finish_tracking_execution(batch_tracking_record) + end + + private + + def start_tracking_execution(tracking_record) + tracking_record.update!(attempts: tracking_record.attempts + 1, status: :running, started_at: Time.current) + end + + def execute_batch(tracking_record) + job_instance = tracking_record.migration_job_class.new + + job_instance.perform( + tracking_record.min_value, + tracking_record.max_value, + tracking_record.migration_table_name, + tracking_record.migration_column_name, + tracking_record.sub_batch_size, + *tracking_record.migration_job_arguments) + end + + def finish_tracking_execution(tracking_record) + tracking_record.finished_at = Time.current + tracking_record.save! + end + end + end + end +end diff --git a/lib/gitlab/database/background_migration/scheduler.rb b/lib/gitlab/database/background_migration/scheduler.rb new file mode 100644 index 00000000000..5f8a5ec06a5 --- /dev/null +++ b/lib/gitlab/database/background_migration/scheduler.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module BackgroundMigration + class Scheduler + def perform(migration_wrapper: BatchedMigrationWrapper.new) + active_migration = BatchedMigration.active.queue_order.first + + return unless active_migration&.interval_elapsed? + + if next_batched_job = create_next_batched_job!(active_migration) + migration_wrapper.perform(next_batched_job) + else + finish_active_migration(active_migration) + end + end + + private + + def create_next_batched_job!(active_migration) + next_batch_range = find_next_batch_range(active_migration) + + return if next_batch_range.nil? + + active_migration.create_batched_job!(next_batch_range.min, next_batch_range.max) + end + + def find_next_batch_range(active_migration) + batching_strategy = active_migration.batch_class.new + batch_min_value = active_migration.next_min_value + + next_batch_bounds = batching_strategy.next_batch( + active_migration.table_name, + active_migration.column_name, + batch_min_value: batch_min_value, + batch_size: active_migration.batch_size) + + return if next_batch_bounds.nil? + + clamped_batch_range(active_migration, next_batch_bounds) + end + + def clamped_batch_range(active_migration, next_bounds) + min_value, max_value = next_bounds + + return if min_value > active_migration.max_value + + max_value = max_value.clamp(min_value, active_migration.max_value) + + (min_value..max_value) + end + + def finish_active_migration(active_migration) + active_migration.finished! + end + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 6b169a504f3..31e733050e1 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -87,9 +87,10 @@ module Gitlab # See Rails' `create_table` for more info on the available arguments. def create_table_with_constraints(table_name, **options, &block) helper_context = self - check_constraints = [] with_lock_retries do + check_constraints = [] + create_table(table_name, **options) do |t| t.define_singleton_method(:check_constraint) do |name, definition| helper_context.send(:validate_check_constraint_name!, name) # rubocop:disable GitlabSecurity/PublicSend @@ -1015,7 +1016,7 @@ module Gitlab 'CopyColumnUsingBackgroundMigrationJob', interval, batch_size: batch_size, - other_job_arguments: [table, primary_key, column, tmp_column, sub_batch_size], + other_job_arguments: [table, primary_key, sub_batch_size, column, tmp_column], track_jobs: true, primary_column_name: primary_key ) diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb index 12dcf68da2f..e8cbea72887 100644 --- a/lib/gitlab/database/migrations/background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/background_migration_helpers.rb @@ -4,8 +4,12 @@ module Gitlab module Database module Migrations module BackgroundMigrationHelpers - BACKGROUND_MIGRATION_BATCH_SIZE = 1_000 # Number of rows to process per job - BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time + BATCH_SIZE = 1_000 # Number of rows to process per job + SUB_BATCH_SIZE = 100 # Number of rows to process per sub-batch + JOB_BUFFER_SIZE = 1_000 # Number of jobs to bulk queue at a time + BATCH_CLASS_NAME = 'PrimaryKeyBatchingStrategy' # Default batch class for batched migrations + BATCH_MIN_VALUE = 1 # Default minimum value for batched migrations + BATCH_MIN_DELAY = 2.minutes.freeze # Minimum delay between batched migrations # Bulk queues background migration jobs for an entire table, batched by ID range. # "Bulk" meaning many jobs will be pushed at a time for efficiency. @@ -31,7 +35,7 @@ module Gitlab # # do something # end # end - def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) + def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BATCH_SIZE) raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id') jobs = [] @@ -40,7 +44,7 @@ module Gitlab model_class.each_batch(of: batch_size) do |relation| start_id, end_id = relation.pluck("MIN(#{table_name}.id)", "MAX(#{table_name}.id)").first - if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE + if jobs.length >= JOB_BUFFER_SIZE # Note: This code path generally only helps with many millions of rows # We push multiple jobs at a time to reduce the time spent in # Sidekiq/Redis operations. We're using this buffer based approach so we @@ -89,7 +93,7 @@ module Gitlab # # do something # end # end - def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false, primary_column_name: :id) + def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false, primary_column_name: :id) raise "#{model_class} does not have an ID column of #{primary_column_name} to use for batch ranges" unless model_class.column_names.include?(primary_column_name.to_s) raise "#{primary_column_name} is not an integer column" unless model_class.columns_hash[primary_column_name.to_s].type == :integer @@ -127,6 +131,79 @@ module Gitlab final_delay end + # Creates a batched background migration for the given table. A batched migration runs one job + # at a time, computing the bounds of the next batch based on the current migration settings and the previous + # batch bounds. Each job's execution status is tracked in the database as the migration runs. The given job + # class must be present in the Gitlab::BackgroundMigration module, and the batch class (if specified) must be + # present in the Gitlab::BackgroundMigration::BatchingStrategies module. + # + # job_class_name - The background migration job class as a string + # batch_table_name - The name of the table the migration will batch over + # batch_column_name - The name of the column the migration will batch over + # job_arguments - Extra arguments to pass to the job instance when the migration runs + # job_interval - The pause interval between each job's execution, minimum of 2 minutes + # batch_min_value - The value in the column the batching will begin at + # batch_max_value - The value in the column the batching will end at, defaults to `SELECT MAX(batch_column)` + # batch_class_name - The name of the class that will be called to find the range of each next batch + # batch_size - The maximum number of rows per job + # sub_batch_size - The maximum number of rows processed per "iteration" within the job + # + # + # *Returns the created BatchedMigration record* + # + # Example: + # + # queue_batched_background_migration( + # 'CopyColumnUsingBackgroundMigrationJob', + # :events, + # :id, + # job_interval: 2.minutes, + # other_job_arguments: ['column1', 'column2']) + # + # Where the the background migration exists: + # + # class Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob + # def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, *other_args) + # # do something + # end + # end + def queue_batched_background_migration( # rubocop:disable Metrics/ParameterLists + job_class_name, + batch_table_name, + batch_column_name, + *job_arguments, + job_interval:, + batch_min_value: BATCH_MIN_VALUE, + batch_max_value: nil, + batch_class_name: BATCH_CLASS_NAME, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + + job_interval = BATCH_MIN_DELAY if job_interval < BATCH_MIN_DELAY + + batch_max_value ||= connection.select_value(<<~SQL) + SELECT MAX(#{connection.quote_column_name(batch_column_name)}) + FROM #{connection.quote_table_name(batch_table_name)} + SQL + + migration_status = batch_max_value.nil? ? :finished : :active + batch_max_value ||= batch_min_value + + Gitlab::Database::BackgroundMigration::BatchedMigration.create!( + job_class_name: job_class_name, + table_name: batch_table_name, + column_name: batch_column_name, + interval: job_interval, + min_value: batch_min_value, + max_value: batch_max_value, + batch_class_name: batch_class_name, + batch_size: batch_size, + sub_batch_size: sub_batch_size, + job_arguments: job_arguments, + status: migration_status) + end + def perform_background_migration_inline? Rails.env.test? || Rails.env.development? end diff --git a/lib/gitlab/database/migrations/observation.rb b/lib/gitlab/database/migrations/observation.rb index 518c2c560d2..046843824a4 100644 --- a/lib/gitlab/database/migrations/observation.rb +++ b/lib/gitlab/database/migrations/observation.rb @@ -7,7 +7,8 @@ module Gitlab :migration, :walltime, :success, - :total_database_size_change + :total_database_size_change, + :query_statistics ) end end diff --git a/lib/gitlab/database/migrations/observers.rb b/lib/gitlab/database/migrations/observers.rb index 4b931d3c19c..592993aeac5 100644 --- a/lib/gitlab/database/migrations/observers.rb +++ b/lib/gitlab/database/migrations/observers.rb @@ -6,7 +6,8 @@ module Gitlab module Observers def self.all_observers [ - TotalDatabaseSizeChange.new + TotalDatabaseSizeChange.new, + QueryStatistics.new ] end end diff --git a/lib/gitlab/database/migrations/observers/query_statistics.rb b/lib/gitlab/database/migrations/observers/query_statistics.rb new file mode 100644 index 00000000000..466f4724256 --- /dev/null +++ b/lib/gitlab/database/migrations/observers/query_statistics.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + module Observers + # This observer gathers statistics from the pg_stat_statements extension. + # Notice that this extension is not installed by default. In case it cannot + # be found, the observer does nothing and doesn't throw an error. + class QueryStatistics < MigrationObserver + include Gitlab::Database::SchemaHelpers + + def before + return unless enabled? + + connection.execute('select pg_stat_statements_reset()') + end + + def record(observation) + return unless enabled? + + observation.query_statistics = connection.execute(<<~SQL) + SELECT query, calls, total_time, max_time, mean_time, rows + FROM pg_stat_statements + ORDER BY total_time DESC + SQL + end + + private + + def enabled? + function_exists?(:pg_stat_statements_reset) && connection.view_exists?(:pg_stat_statements) + end + end + end + end + end +end diff --git a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb index f4cf576dda7..1c289391e21 100644 --- a/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb +++ b/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb @@ -9,7 +9,7 @@ module Gitlab include ::Gitlab::Database::MigrationHelpers include ::Gitlab::Database::Migrations::BackgroundMigrationHelpers - ALLOWED_TABLES = %w[audit_events].freeze + ALLOWED_TABLES = %w[audit_events web_hook_logs].freeze ERROR_SCOPE = 'table partitioning' MIGRATION_CLASS_NAME = "::#{module_parent_name}::BackfillPartitionedTable" diff --git a/lib/gitlab/database/similarity_score.rb b/lib/gitlab/database/similarity_score.rb index ff78fd0218c..40845c0d5e0 100644 --- a/lib/gitlab/database/similarity_score.rb +++ b/lib/gitlab/database/similarity_score.rb @@ -74,9 +74,14 @@ module Gitlab end # (SIMILARITY ...) + (SIMILARITY ...) - expressions.inject(first_expression) do |expression1, expression2| + additions = expressions.inject(first_expression) do |expression1, expression2| Arel::Nodes::Addition.new(expression1, expression2) end + + score_as_numeric = Arel::Nodes::NamedFunction.new('CAST', [Arel::Nodes::Grouping.new(additions).as('numeric')]) + + # Rounding the score to two decimals + Arel::Nodes::NamedFunction.new('ROUND', [score_as_numeric, 2]) end def self.order_by_similarity?(arel_query) |