diff options
Diffstat (limited to 'lib/gitlab/background_migration')
31 files changed, 377 insertions, 279 deletions
diff --git a/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb b/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb deleted file mode 100644 index 2247747ba08..00000000000 --- a/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb +++ /dev/null @@ -1,77 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # A job to create ci_namespace_mirrors entries in batches - class BackfillCiNamespaceMirrors - class Namespace < ActiveRecord::Base # rubocop:disable Style/Documentation - include ::EachBatch - - self.table_name = 'namespaces' - self.inheritance_column = nil - - scope :base_query, -> do - select(:id, :parent_id) - end - end - - PAUSE_SECONDS = 0.1 - SUB_BATCH_SIZE = 500 - - def perform(start_id, end_id) - batch_query = Namespace.base_query.where(id: start_id..end_id) - batch_query.each_batch(of: SUB_BATCH_SIZE) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first - ranged_query = Namespace.unscoped.base_query.where(id: first..last) - - update_sql = <<~SQL - INSERT INTO ci_namespace_mirrors (namespace_id, traversal_ids) - #{insert_values(ranged_query)} - ON CONFLICT (namespace_id) DO NOTHING - SQL - # We do nothing on conflict because we consider they were already filled. - - Namespace.connection.execute(update_sql) - - sleep PAUSE_SECONDS - end - - mark_job_as_succeeded(start_id, end_id) - end - - private - - def insert_values(batch) - calculated_traversal_ids( - batch.allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/336433') - ) - end - - # Copied from lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb - def calculated_traversal_ids(batch) - <<~SQL - WITH RECURSIVE cte(source_id, namespace_id, parent_id, height) AS ( - ( - SELECT batch.id, batch.id, batch.parent_id, 1 - FROM (#{batch.to_sql}) AS batch - ) - UNION ALL - ( - SELECT cte.source_id, n.id, n.parent_id, cte.height+1 - FROM namespaces n, cte - WHERE n.id = cte.parent_id - ) - ) - SELECT flat_hierarchy.source_id as namespace_id, - array_agg(flat_hierarchy.namespace_id ORDER BY flat_hierarchy.height DESC) as traversal_ids - FROM (SELECT * FROM cte FOR UPDATE) flat_hierarchy - GROUP BY flat_hierarchy.source_id - SQL - end - - def mark_job_as_succeeded(*arguments) - Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillCiNamespaceMirrors', arguments) - end - end - end -end diff --git a/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb b/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb deleted file mode 100644 index ff6ab9928b0..00000000000 --- a/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb +++ /dev/null @@ -1,52 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # A job to create ci_project_mirrors entries in batches - class BackfillCiProjectMirrors - class Project < ActiveRecord::Base # rubocop:disable Style/Documentation - include ::EachBatch - - self.table_name = 'projects' - - scope :base_query, -> do - select(:id, :namespace_id) - end - end - - PAUSE_SECONDS = 0.1 - SUB_BATCH_SIZE = 500 - - def perform(start_id, end_id) - batch_query = Project.base_query.where(id: start_id..end_id) - batch_query.each_batch(of: SUB_BATCH_SIZE) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first - ranged_query = Project.unscoped.base_query.where(id: first..last) - - update_sql = <<~SQL - INSERT INTO ci_project_mirrors (project_id, namespace_id) - #{insert_values(ranged_query)} - ON CONFLICT (project_id) DO NOTHING - SQL - # We do nothing on conflict because we consider they were already filled. - - Project.connection.execute(update_sql) - - sleep PAUSE_SECONDS - end - - mark_job_as_succeeded(start_id, end_id) - end - - private - - def insert_values(batch) - batch.allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/336433').to_sql - end - - def mark_job_as_succeeded(*arguments) - Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillCiProjectMirrors', arguments) - end - end - end -end diff --git a/lib/gitlab/background_migration/backfill_ci_runner_semver.rb b/lib/gitlab/background_migration/backfill_ci_runner_semver.rb deleted file mode 100644 index 0901649f789..00000000000 --- a/lib/gitlab/background_migration/backfill_ci_runner_semver.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # A job to update semver column in ci_runners in batches based on existing version values - class BackfillCiRunnerSemver < Gitlab::BackgroundMigration::BatchedMigrationJob - def perform - each_sub_batch( - operation_name: :backfill_ci_runner_semver, - batching_scope: ->(relation) { relation.where('semver::cidr IS NULL') } - ) do |sub_batch| - ranged_query = sub_batch.select( - %q(id AS r_id, - substring(ci_runners.version FROM 'v?(\d+\.\d+\.\d+)') AS extracted_semver) - ) - - update_sql = <<~SQL - UPDATE - ci_runners - SET semver = extracted_semver - FROM (#{ranged_query.to_sql}) v - WHERE id = v.r_id - AND v.extracted_semver IS NOT NULL - SQL - - connection.execute(update_sql) - end - end - end - end -end diff --git a/lib/gitlab/background_migration/backfill_group_features.rb b/lib/gitlab/background_migration/backfill_group_features.rb index 4c3af7be319..35b5282360f 100644 --- a/lib/gitlab/background_migration/backfill_group_features.rb +++ b/lib/gitlab/background_migration/backfill_group_features.rb @@ -4,19 +4,21 @@ module Gitlab module BackgroundMigration # Backfill group_features for an array of groups class BackfillGroupFeatures < ::Gitlab::BackgroundMigration::BatchedMigrationJob - def perform(batch_size) + job_arguments :batch_size + + def perform each_sub_batch( operation_name: :upsert_group_features, batching_arguments: { order_hint: :type }, batching_scope: ->(relation) { relation.where(type: 'Group') } ) do |sub_batch| - upsert_group_features(sub_batch, batch_size) + upsert_group_features(sub_batch) end end private - def upsert_group_features(relation, batch_size) + def upsert_group_features(relation) connection.execute( <<~SQL INSERT INTO group_features (group_id, created_at, updated_at) diff --git a/lib/gitlab/background_migration/backfill_integrations_type_new.rb b/lib/gitlab/background_migration/backfill_integrations_type_new.rb index 6f33472af7d..b07d9371c19 100644 --- a/lib/gitlab/background_migration/backfill_integrations_type_new.rb +++ b/lib/gitlab/background_migration/backfill_integrations_type_new.rb @@ -27,7 +27,7 @@ module Gitlab def process_sub_batch(sub_batch) # Extract the start/stop IDs from the current sub-batch - sub_start_id, sub_stop_id = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first + sub_start_id, sub_stop_id = sub_batch.pick(Arel.sql('MIN(id), MAX(id)')) # This matches the mapping from the INSERT trigger added in # db/migrate/20210721135638_add_triggers_to_integrations_type_new.rb diff --git a/lib/gitlab/background_migration/backfill_namespace_id_of_vulnerability_reads.rb b/lib/gitlab/background_migration/backfill_namespace_id_of_vulnerability_reads.rb new file mode 100644 index 00000000000..cd349bf3ae1 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_namespace_id_of_vulnerability_reads.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Sets the `namespace_id` of the existing `vulnerability_reads` records + class BackfillNamespaceIdOfVulnerabilityReads < BatchedMigrationJob + UPDATE_SQL = <<~SQL + UPDATE + vulnerability_reads + SET + namespace_id = sub_query.namespace_id + FROM + (%<subquery>s) as sub_query + WHERE + vulnerability_reads.vulnerability_id = sub_query.vulnerability_id + SQL + + def perform + each_sub_batch(operation_name: :set_namespace_id) do |sub_batch| + update_query = update_query_for(sub_batch) + + connection.execute(update_query) + end + end + + private + + def update_query_for(sub_batch) + subquery = sub_batch.select("vulnerability_reads.vulnerability_id, projects.namespace_id") + .joins("INNER JOIN projects ON projects.id = vulnerability_reads.project_id") + + format(UPDATE_SQL, subquery: subquery.to_sql) + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb index 587de1bcb5a..3b8a452b855 100644 --- a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb +++ b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb @@ -19,7 +19,7 @@ module Gitlab def perform(start_id, end_id, sub_batch_size) batch_query = Namespace.base_query.where(id: start_id..end_id) batch_query.each_batch(of: sub_batch_size) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) ranged_query = Namespace.unscoped.base_query.where(id: first..last) update_sql = <<~SQL diff --git a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb index 1c0a83285a6..c69289fb91f 100644 --- a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb +++ b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb @@ -22,7 +22,7 @@ module Gitlab .where("traversal_ids = '{}'") ranged_query.each_batch(of: sub_batch_size) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) # The query need to be reconstructed because .each_batch modifies the default scope # See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510 diff --git a/lib/gitlab/background_migration/backfill_project_import_level.rb b/lib/gitlab/background_migration/backfill_project_import_level.rb new file mode 100644 index 00000000000..06706b729ea --- /dev/null +++ b/lib/gitlab/background_migration/backfill_project_import_level.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation +module Gitlab + module BackgroundMigration + class BackfillProjectImportLevel < BatchedMigrationJob + LEVEL = { + Gitlab::Access::NO_ACCESS => [0], + Gitlab::Access::DEVELOPER => [2], + Gitlab::Access::MAINTAINER => [1], + Gitlab::Access::OWNER => [nil] + }.freeze + + def perform + each_sub_batch(operation_name: :update_import_level) do |sub_batch| + update_import_level(sub_batch) + end + end + + private + + def update_import_level(relation) + LEVEL.each do |import_level, creation_level| + namespace_ids = relation + .where(type: 'Group', project_creation_level: creation_level) + + NamespaceSetting.where( + namespace_id: namespace_ids + ).update_all(project_import_level: import_level) + end + end + end + end +end + +# rubocop:enable Style/Documentation diff --git a/lib/gitlab/background_migration/backfill_vulnerability_reads_cluster_agent.rb b/lib/gitlab/background_migration/backfill_vulnerability_reads_cluster_agent.rb new file mode 100644 index 00000000000..728b60f7a0e --- /dev/null +++ b/lib/gitlab/background_migration/backfill_vulnerability_reads_cluster_agent.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Backfills the `vulnerability_reads.casted_cluster_agent_id` column + class BackfillVulnerabilityReadsClusterAgent < Gitlab::BackgroundMigration::BatchedMigrationJob + CLUSTER_AGENTS_JOIN = <<~SQL + INNER JOIN cluster_agents + ON CAST(vulnerability_reads.cluster_agent_id AS bigint) = cluster_agents.id AND + vulnerability_reads.project_id = cluster_agents.project_id + SQL + + RELATION = ->(relation) do + relation + .where(report_type: 7) + end + + def perform + each_sub_batch( + operation_name: :update_all, + batching_scope: RELATION + ) do |sub_batch| + sub_batch + .joins(CLUSTER_AGENTS_JOIN) + .update_all('casted_cluster_agent_id = CAST(vulnerability_reads.cluster_agent_id AS bigint)') + end + end + end + end +end diff --git a/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb b/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb index a16efa4222b..32962f2bb89 100644 --- a/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb +++ b/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb @@ -20,7 +20,7 @@ module Gitlab parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id, base_type) parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) # The query need to be reconstructed because .each_batch modifies the default scope # See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510 diff --git a/lib/gitlab/background_migration/batched_migration_job.rb b/lib/gitlab/background_migration/batched_migration_job.rb index c47b1735ccf..11d15804344 100644 --- a/lib/gitlab/background_migration/batched_migration_job.rb +++ b/lib/gitlab/background_migration/batched_migration_job.rb @@ -3,22 +3,62 @@ module Gitlab module BackgroundMigration # Base class for batched background migrations. Subclasses should implement the `#perform` - # method as the entry point for the job's execution, which will be called with the migration - # arguments (if any). + # method as the entry point for the job's execution. + # + # Job arguments needed must be defined explicitly, + # see https://docs.gitlab.com/ee/development/database/batched_background_migrations.html#job-arguments. class BatchedMigrationJob include Gitlab::Database::DynamicModelHelpers - def initialize(start_id:, end_id:, batch_table:, batch_column:, sub_batch_size:, pause_ms:, connection:) + def initialize( + start_id:, end_id:, batch_table:, batch_column:, sub_batch_size:, pause_ms:, job_arguments: [], connection: + ) + @start_id = start_id @end_id = end_id @batch_table = batch_table @batch_column = batch_column @sub_batch_size = sub_batch_size @pause_ms = pause_ms + @job_arguments = job_arguments @connection = connection end - def perform(*job_arguments) + def self.generic_instance(batch_table:, batch_column:, job_arguments: [], connection:) + new( + batch_table: batch_table, batch_column: batch_column, + job_arguments: job_arguments, connection: connection, + start_id: 0, end_id: 0, sub_batch_size: 0, pause_ms: 0 + ) + end + + def self.job_arguments_count + 0 + end + + def self.job_arguments(*args) + args.each.with_index do |arg, index| + define_method(arg) do + @job_arguments[index] + end + end + + define_singleton_method(:job_arguments_count) do + args.count + end + end + + def self.scope_to(scope) + define_method(:filter_batch) do |relation| + instance_exec(relation, &scope) + end + end + + def filter_batch(relation) + relation + end + + def perform raise NotImplementedError, "subclasses of #{self.class.name} must implement #{__method__}" end @@ -33,9 +73,10 @@ module Gitlab def each_sub_batch(operation_name: :default, batching_arguments: {}, batching_scope: nil) all_batching_arguments = { column: batch_column, of: sub_batch_size }.merge(batching_arguments) - parent_relation = parent_batch_relation(batching_scope) + relation = filter_batch(base_relation) + sub_batch_relation = filter_sub_batch(relation, batching_scope) - parent_relation.each_batch(**all_batching_arguments) do |relation| + sub_batch_relation.each_batch(**all_batching_arguments) do |relation| batch_metrics.instrument_operation(operation_name) do yield relation end @@ -45,9 +86,13 @@ module Gitlab end def distinct_each_batch(operation_name: :default, batching_arguments: {}) + if base_relation != filter_batch(base_relation) + raise 'distinct_each_batch can not be used when additional filters are defined with scope_to' + end + all_batching_arguments = { column: batch_column, of: sub_batch_size }.merge(batching_arguments) - parent_batch_relation.distinct_each_batch(**all_batching_arguments) do |relation| + base_relation.distinct_each_batch(**all_batching_arguments) do |relation| batch_metrics.instrument_operation(operation_name) do yield relation end @@ -56,13 +101,15 @@ module Gitlab end end - def parent_batch_relation(batching_scope = nil) - parent_relation = define_batchable_model(batch_table, connection: connection) + def base_relation + define_batchable_model(batch_table, connection: connection) .where(batch_column => start_id..end_id) + end - return parent_relation unless batching_scope + def filter_sub_batch(relation, batching_scope = nil) + return relation unless batching_scope - batching_scope.call(parent_relation) + batching_scope.call(relation) end end end diff --git a/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb index 68be42dc0a0..12fd9ae7161 100644 --- a/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb +++ b/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb @@ -25,7 +25,7 @@ module Gitlab relation = model_class.where(projects_table[:namespace_id].in(hierarchy_cte_sql)).where("#{quoted_column_name} >= ?", batch_min_value) relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop - next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first + next_batch_bounds = batch.pick(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")) break end diff --git a/lib/gitlab/background_migration/batching_strategies/backfill_vulnerability_reads_cluster_agent_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/backfill_vulnerability_reads_cluster_agent_batching_strategy.rb new file mode 100644 index 00000000000..f0d015198dc --- /dev/null +++ b/lib/gitlab/background_migration/batching_strategies/backfill_vulnerability_reads_cluster_agent_batching_strategy.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module BatchingStrategies + # Batching class to use for back-filling vulnerability_read's casted_cluster_agent_id from cluster_agent_id. + # Batches will be scoped to records where the report_type belongs to cluster_image_scanning. + # + # If no more batches exist in the table, returns nil. + class BackfillVulnerabilityReadsClusterAgentBatchingStrategy < PrimaryKeyBatchingStrategy + CLUSTER_IMAGE_SCANNING_REPORT_TYPE = 7 + + def apply_additional_filters(relation, job_arguments: [], job_class: nil) + relation.where(report_type: CLUSTER_IMAGE_SCANNING_REPORT_TYPE) + end + end + end + end +end diff --git a/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb index 5cad9d2e3c4..fc08d2b0ab6 100644 --- a/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb +++ b/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb @@ -24,7 +24,7 @@ module Gitlab next_batch_bounds = nil relation.distinct_each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop - next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first + next_batch_bounds = batch.pick(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")) break end diff --git a/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb index c2f59bf9c76..1ffa4a052e5 100644 --- a/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb +++ b/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb @@ -24,11 +24,19 @@ module Gitlab quoted_column_name = model_class.connection.quote_column_name(column_name) relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value) + + if job_class + relation = filter_batch(relation, + table_name: table_name, column_name: column_name, + job_class: job_class, job_arguments: job_arguments + ) + end + relation = apply_additional_filters(relation, job_arguments: job_arguments, job_class: job_class) next_batch_bounds = nil relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop - next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first + next_batch_bounds = batch.pick(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")) break end @@ -36,13 +44,27 @@ module Gitlab next_batch_bounds end + # Deprecated + # + # Use `scope_to` to define additional filters on the migration job class. + # + # see https://docs.gitlab.com/ee/development/database/batched_background_migrations.html#adding-additional-filters. def apply_additional_filters(relation, job_arguments: [], job_class: nil) - if job_class.respond_to?(:batching_scope) - return job_class.batching_scope(relation, job_arguments: job_arguments) - end - relation end + + private + + def filter_batch(relation, table_name:, column_name:, job_class:, job_arguments: []) + return relation unless job_class.respond_to?(:generic_instance) + + job = job_class.generic_instance( + batch_table: table_name, batch_column: column_name, + job_arguments: job_arguments, connection: connection + ) + + job.filter_batch(relation) + end end end end diff --git a/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb b/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb index cb9b0e88ef4..4da120769a0 100644 --- a/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb +++ b/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb @@ -62,7 +62,7 @@ module Gitlab batch = LfsObjectsProject.where(id: start_id..end_id) batch.each_batch(of: SUB_BATCH_SIZE) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(lfs_objects_projects.id), max(lfs_objects_projects.id)')).first + first, last = sub_batch.pick(Arel.sql('min(lfs_objects_projects.id), max(lfs_objects_projects.id)')) lfs_objects_without_association = LfsObjectsProject diff --git a/lib/gitlab/background_migration/copy_ci_builds_columns_to_security_scans.rb b/lib/gitlab/background_migration/copy_ci_builds_columns_to_security_scans.rb deleted file mode 100644 index 107ac9b0c3b..00000000000 --- a/lib/gitlab/background_migration/copy_ci_builds_columns_to_security_scans.rb +++ /dev/null @@ -1,44 +0,0 @@ -# frozen_string_literal: true -# rubocop:disable Style/Documentation - -module Gitlab - module BackgroundMigration - class CopyCiBuildsColumnsToSecurityScans - extend ::Gitlab::Utils::Override - - UPDATE_BATCH_SIZE = 500 - - def perform(start_id, stop_id) - (start_id..stop_id).step(UPDATE_BATCH_SIZE).each do |offset| - batch_start = offset - batch_stop = offset + UPDATE_BATCH_SIZE - 1 - - ActiveRecord::Base.connection.execute <<~SQL - UPDATE - security_scans - SET - project_id = ci_builds.project_id, - pipeline_id = ci_builds.commit_id - FROM ci_builds - WHERE ci_builds.type='Ci::Build' - AND ci_builds.id=security_scans.build_id - AND security_scans.id BETWEEN #{Integer(batch_start)} AND #{Integer(batch_stop)} - SQL - end - - mark_job_as_succeeded(start_id, stop_id) - rescue StandardError => error - Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error) - end - - private - - def mark_job_as_succeeded(*arguments) - Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( - 'CopyCiBuildsColumnsToSecurityScans', - arguments - ) - end - end - end -end diff --git a/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb index 826845935b8..15e54431a44 100644 --- a/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb +++ b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb @@ -14,7 +14,9 @@ module Gitlab # - The table that is migrated does _not_ need `id` as the primary key # We use the provided primary_key column to perform the update. class CopyColumnUsingBackgroundMigrationJob < BatchedMigrationJob - def perform(copy_from, copy_to) + job_arguments :copy_from, :copy_to + + def perform assignment_clauses = build_assignment_clauses(copy_from, copy_to) each_sub_batch(operation_name: :update_all) do |relation| diff --git a/lib/gitlab/background_migration/disable_legacy_open_source_license_for_no_issues_no_repo_projects.rb b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_no_issues_no_repo_projects.rb new file mode 100644 index 00000000000..019c3d15b3e --- /dev/null +++ b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_no_issues_no_repo_projects.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Set `project_settings.legacy_open_source_license_available` to false for public projects with no issues & no repo + class DisableLegacyOpenSourceLicenseForNoIssuesNoRepoProjects < ::Gitlab::BackgroundMigration::BatchedMigrationJob + PUBLIC = 20 + + # Migration only version of `project_settings` table + class ProjectSetting < ApplicationRecord + self.table_name = 'project_settings' + end + + def perform + each_sub_batch( + operation_name: :disable_legacy_open_source_license_for_no_issues_no_repo_projects, + batching_scope: ->(relation) { relation.where(visibility_level: PUBLIC) } + ) do |sub_batch| + no_issues_no_repo_projects = + sub_batch + .joins('LEFT OUTER JOIN project_statistics ON project_statistics.project_id = projects.id') + .joins('LEFT OUTER JOIN project_settings ON project_settings.project_id = projects.id') + .joins('LEFT OUTER JOIN issues ON issues.project_id = projects.id') + .where('project_statistics.repository_size' => 0, + 'project_settings.legacy_open_source_license_available' => true) + .group('projects.id') + .having('COUNT(issues.id) = 0') + + ProjectSetting + .where(project_id: no_issues_no_repo_projects) + .update_all(legacy_open_source_license_available: false) + end + end + end + end +end diff --git a/lib/gitlab/background_migration/disable_legacy_open_source_license_for_one_member_no_repo_projects.rb b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_one_member_no_repo_projects.rb new file mode 100644 index 00000000000..3a9049b1f19 --- /dev/null +++ b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_one_member_no_repo_projects.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Set `project_settings.legacy_open_source_license_available` to false for public projects with 1 member and no repo + class DisableLegacyOpenSourceLicenseForOneMemberNoRepoProjects < ::Gitlab::BackgroundMigration::BatchedMigrationJob + PUBLIC = 20 + + # Migration only version of `project_settings` table + class ProjectSetting < ApplicationRecord + self.table_name = 'project_settings' + end + + def perform + each_sub_batch( + operation_name: :disable_legacy_open_source_license_for_one_member_no_repo_projects, + batching_scope: ->(relation) { relation.where(visibility_level: PUBLIC) } + ) do |sub_batch| + one_member_no_repo_projects = + sub_batch + .joins('LEFT OUTER JOIN project_statistics ON project_statistics.project_id = projects.id') + .joins('LEFT OUTER JOIN project_settings ON project_settings.project_id = projects.id') + .joins('LEFT OUTER JOIN project_authorizations ON project_authorizations.project_id = projects.id') + .where('project_statistics.repository_size' => 0, + 'project_settings.legacy_open_source_license_available' => true) + .group('projects.id') + .having('COUNT(project_authorizations.user_id) = 1') + + ProjectSetting + .where(project_id: one_member_no_repo_projects) + .update_all(legacy_open_source_license_available: false) + end + end + end + end +end diff --git a/lib/gitlab/background_migration/drop_invalid_security_findings.rb b/lib/gitlab/background_migration/drop_invalid_security_findings.rb index 87551bb1b1e..000628e109c 100644 --- a/lib/gitlab/background_migration/drop_invalid_security_findings.rb +++ b/lib/gitlab/background_migration/drop_invalid_security_findings.rb @@ -19,7 +19,7 @@ module Gitlab .no_uuid ranged_query.each_batch(of: sub_batch_size) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) # The query need to be reconstructed because .each_batch modifies the default scope # See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510 diff --git a/lib/gitlab/background_migration/encrypt_static_object_token.rb b/lib/gitlab/background_migration/encrypt_static_object_token.rb index a087d2529eb..e1805d40bab 100644 --- a/lib/gitlab/background_migration/encrypt_static_object_token.rb +++ b/lib/gitlab/background_migration/encrypt_static_object_token.rb @@ -23,7 +23,7 @@ module Gitlab .without_static_object_token_encrypted ranged_query.each_batch(of: BATCH_SIZE) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) batch_query = User.unscoped .where(id: first..last) diff --git a/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb b/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb index 8f785476aa0..6de2187b8e3 100644 --- a/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb +++ b/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb @@ -29,7 +29,7 @@ module Gitlab def perform(start_id, end_id) scope(start_id, end_id).each_batch(of: SUB_BATCH_SIZE, column: :issue_id) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(issue_id), max(issue_id)')).first + first, last = sub_batch.pick(Arel.sql('min(issue_id), max(issue_id)')) # The query need to be reconstructed because .each_batch modifies the default scope # See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510 diff --git a/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb b/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb index 496ec0bd0a1..97a9913fa74 100644 --- a/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb +++ b/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb @@ -137,7 +137,7 @@ module Gitlab def create_sql(from_id, to_id) <<~SQL WITH created_records AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} ( - INSERT INTO services (project_id, #{DEFAULTS.keys.map { |key| %("#{key}")}.join(',')}, created_at, updated_at) + INSERT INTO services (project_id, #{DEFAULTS.keys.map { |key| %("#{key}") }.join(',')}, created_at, updated_at) #{select_insert_values_sql(from_id, to_id)} RETURNING * ) diff --git a/lib/gitlab/background_migration/migrate_shared_vulnerability_scanners.rb b/lib/gitlab/background_migration/migrate_shared_vulnerability_scanners.rb new file mode 100644 index 00000000000..bea0120f093 --- /dev/null +++ b/lib/gitlab/background_migration/migrate_shared_vulnerability_scanners.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop: disable Style/Documentation + class MigrateSharedVulnerabilityScanners < BatchedMigrationJob + def perform + end + end + # rubocop: enable Style/Documentation + end +end + +# rubocop: disable Layout/LineLength +Gitlab::BackgroundMigration::MigrateSharedVulnerabilityScanners.prepend_mod_with("Gitlab::BackgroundMigration::MigrateSharedVulnerabilityScanners") +# rubocop: enable Layout/LineLength diff --git a/lib/gitlab/background_migration/populate_status_column_of_security_scans.rb b/lib/gitlab/background_migration/populate_status_column_of_security_scans.rb deleted file mode 100644 index 9740bcaa86b..00000000000 --- a/lib/gitlab/background_migration/populate_status_column_of_security_scans.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - class PopulateStatusColumnOfSecurityScans # rubocop:disable Style/Documentation - def perform(_start_id, _end_id) - # no-op - end - end - end -end - -Gitlab::BackgroundMigration::PopulateStatusColumnOfSecurityScans.prepend_mod diff --git a/lib/gitlab/background_migration/populate_vulnerability_reads.rb b/lib/gitlab/background_migration/populate_vulnerability_reads.rb index 5e6475a3d1a..656c62d9ee5 100644 --- a/lib/gitlab/background_migration/populate_vulnerability_reads.rb +++ b/lib/gitlab/background_migration/populate_vulnerability_reads.rb @@ -10,7 +10,7 @@ module Gitlab def perform(start_id, end_id, sub_batch_size) vulnerability_model.where(id: start_id..end_id).each_batch(of: sub_batch_size) do |sub_batch| - first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first + first, last = sub_batch.pick(Arel.sql('min(id), max(id)')) connection.execute(insert_query(first, last)) sleep PAUSE_SECONDS diff --git a/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb index 2b27bad3497..845a3c16bbe 100644 --- a/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb +++ b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb @@ -105,9 +105,11 @@ module Gitlab .joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id") .select("namespaces.id as project_namespace_id, n2.traversal_ids") + # some customers have namespaces.id column type as bigint, which makes array_append(integer[], bigint) to fail + # so we just explicitly cast arguments to compatible types ApplicationRecord.connection.execute <<~SQL UPDATE namespaces - SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id) + SET traversal_ids = array_append(project_namespaces.traversal_ids::bigint[], project_namespaces.project_namespace_id::bigint) FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids) WHERE id = project_namespaces.project_namespace_id SQL diff --git a/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb b/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb index bba1ca26b35..e9a38916999 100644 --- a/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb +++ b/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb @@ -1,42 +1,74 @@ # frozen_string_literal: true # rubocop: disable Style/Documentation -class Gitlab::BackgroundMigration::UpdateJiraTrackerDataDeploymentTypeBasedOnUrl - # rubocop: disable Gitlab/NamespacedClass - class JiraTrackerData < ActiveRecord::Base - self.table_name = "jira_tracker_data" - self.inheritance_column = :_type_disabled +module Gitlab + module BackgroundMigration + class UpdateJiraTrackerDataDeploymentTypeBasedOnUrl < Gitlab::BackgroundMigration::BatchedMigrationJob + # rubocop: disable Gitlab/NamespacedClass + class JiraTrackerData < ActiveRecord::Base + self.table_name = "jira_tracker_data" + self.inheritance_column = :_type_disabled - include ::Integrations::BaseDataFields - attr_encrypted :url, encryption_options - attr_encrypted :api_url, encryption_options + include ::Integrations::BaseDataFields + attr_encrypted :url, encryption_options + attr_encrypted :api_url, encryption_options - enum deployment_type: { unknown: 0, server: 1, cloud: 2 }, _prefix: :deployment - end - # rubocop: enable Gitlab/NamespacedClass + enum deployment_type: { unknown: 0, server: 1, cloud: 2 }, _prefix: :deployment + end + # rubocop: enable Gitlab/NamespacedClass - # https://rubular.com/r/uwgK7k9KH23efa - JIRA_CLOUD_REGEX = %r{^https?://[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?\.atlassian\.net$}ix.freeze + # https://rubular.com/r/uwgK7k9KH23efa + JIRA_CLOUD_REGEX = %r{^https?://[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?\.atlassian\.net$}ix.freeze - # rubocop: disable CodeReuse/ActiveRecord - def perform(start_id, end_id) - trackers_data = JiraTrackerData - .where(deployment_type: 'unknown') - .where(id: start_id..end_id) + def perform + cloud = [] + server = [] + unknown = [] - cloud, server = trackers_data.partition { |tracker_data| tracker_data.url.match?(JIRA_CLOUD_REGEX) } + trackers_data.each do |tracker_data| + client_url = tracker_data.api_url.presence || tracker_data.url - cloud_mappings = cloud.each_with_object({}) do |tracker_data, hash| - hash[tracker_data] = { deployment_type: 2 } - end + if client_url.blank? + unknown << tracker_data + elsif client_url.match?(JIRA_CLOUD_REGEX) + cloud << tracker_data + else + server << tracker_data + end + end - server_mapppings = server.each_with_object({}) do |tracker_data, hash| - hash[tracker_data] = { deployment_type: 1 } - end + cloud_mappings = cloud.each_with_object({}) do |tracker_data, hash| + hash[tracker_data] = { deployment_type: 2 } + end + + server_mappings = server.each_with_object({}) do |tracker_data, hash| + hash[tracker_data] = { deployment_type: 1 } + end + + unknown_mappings = unknown.each_with_object({}) do |tracker_data, hash| + hash[tracker_data] = { deployment_type: 0 } + end - mappings = cloud_mappings.merge(server_mapppings) + mappings = cloud_mappings.merge(server_mappings, unknown_mappings) - ::Gitlab::Database::BulkUpdate.execute(%i[deployment_type], mappings) + update_records(mappings) + end + + private + + def update_records(mappings) + return if mappings.empty? + + ::Gitlab::Database::BulkUpdate.execute(%i[deployment_type], mappings) + end + + # rubocop: disable CodeReuse/ActiveRecord + def trackers_data + @trackers_data ||= JiraTrackerData + .where(deployment_type: 'unknown') + .where(batch_column => start_id..end_id) + end + # rubocop: enable CodeReuse/ActiveRecord + end end - # rubocop: enable CodeReuse/ActiveRecord end diff --git a/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb index 38932e52bb0..b61f2ee7f4c 100644 --- a/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb +++ b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb @@ -12,7 +12,7 @@ module Gitlab define_batchable_model('timelogs', connection: connection) .where(spent_at: nil, id: start_id..stop_id) .each_batch(of: 100) do |subbatch| - batch_start, batch_end = subbatch.pluck('min(id), max(id)').first + batch_start, batch_end = subbatch.pick('min(id), max(id)') update_timelogs(batch_start, batch_end) end |