summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2022-08-18 08:17:02 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2022-08-18 08:17:02 +0000
commitb39512ed755239198a9c294b6a45e65c05900235 (patch)
treed234a3efade1de67c46b9e5a38ce813627726aa7 /lib/gitlab/background_migration
parentd31474cf3b17ece37939d20082b07f6657cc79a9 (diff)
downloadgitlab-ce-b39512ed755239198a9c294b6a45e65c05900235.tar.gz
Add latest changes from gitlab-org/gitlab@15-3-stable-eev15.3.0-rc42
Diffstat (limited to 'lib/gitlab/background_migration')
-rw-r--r--lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb77
-rw-r--r--lib/gitlab/background_migration/backfill_ci_project_mirrors.rb52
-rw-r--r--lib/gitlab/background_migration/backfill_ci_runner_semver.rb31
-rw-r--r--lib/gitlab/background_migration/backfill_group_features.rb8
-rw-r--r--lib/gitlab/background_migration/backfill_integrations_type_new.rb2
-rw-r--r--lib/gitlab/background_migration/backfill_namespace_id_of_vulnerability_reads.rb36
-rw-r--r--lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb2
-rw-r--r--lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb2
-rw-r--r--lib/gitlab/background_migration/backfill_project_import_level.rb35
-rw-r--r--lib/gitlab/background_migration/backfill_vulnerability_reads_cluster_agent.rb30
-rw-r--r--lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb2
-rw-r--r--lib/gitlab/background_migration/batched_migration_job.rb69
-rw-r--r--lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb2
-rw-r--r--lib/gitlab/background_migration/batching_strategies/backfill_vulnerability_reads_cluster_agent_batching_strategy.rb19
-rw-r--r--lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb2
-rw-r--r--lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb32
-rw-r--r--lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb2
-rw-r--r--lib/gitlab/background_migration/copy_ci_builds_columns_to_security_scans.rb44
-rw-r--r--lib/gitlab/background_migration/copy_column_using_background_migration_job.rb4
-rw-r--r--lib/gitlab/background_migration/disable_legacy_open_source_license_for_no_issues_no_repo_projects.rb36
-rw-r--r--lib/gitlab/background_migration/disable_legacy_open_source_license_for_one_member_no_repo_projects.rb36
-rw-r--r--lib/gitlab/background_migration/drop_invalid_security_findings.rb2
-rw-r--r--lib/gitlab/background_migration/encrypt_static_object_token.rb2
-rw-r--r--lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb2
-rw-r--r--lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb2
-rw-r--r--lib/gitlab/background_migration/migrate_shared_vulnerability_scanners.rb16
-rw-r--r--lib/gitlab/background_migration/populate_status_column_of_security_scans.rb13
-rw-r--r--lib/gitlab/background_migration/populate_vulnerability_reads.rb2
-rw-r--r--lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb4
-rw-r--r--lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb88
-rw-r--r--lib/gitlab/background_migration/update_timelogs_null_spent_at.rb2
31 files changed, 377 insertions, 279 deletions
diff --git a/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb b/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb
deleted file mode 100644
index 2247747ba08..00000000000
--- a/lib/gitlab/background_migration/backfill_ci_namespace_mirrors.rb
+++ /dev/null
@@ -1,77 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # A job to create ci_namespace_mirrors entries in batches
- class BackfillCiNamespaceMirrors
- class Namespace < ActiveRecord::Base # rubocop:disable Style/Documentation
- include ::EachBatch
-
- self.table_name = 'namespaces'
- self.inheritance_column = nil
-
- scope :base_query, -> do
- select(:id, :parent_id)
- end
- end
-
- PAUSE_SECONDS = 0.1
- SUB_BATCH_SIZE = 500
-
- def perform(start_id, end_id)
- batch_query = Namespace.base_query.where(id: start_id..end_id)
- batch_query.each_batch(of: SUB_BATCH_SIZE) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first
- ranged_query = Namespace.unscoped.base_query.where(id: first..last)
-
- update_sql = <<~SQL
- INSERT INTO ci_namespace_mirrors (namespace_id, traversal_ids)
- #{insert_values(ranged_query)}
- ON CONFLICT (namespace_id) DO NOTHING
- SQL
- # We do nothing on conflict because we consider they were already filled.
-
- Namespace.connection.execute(update_sql)
-
- sleep PAUSE_SECONDS
- end
-
- mark_job_as_succeeded(start_id, end_id)
- end
-
- private
-
- def insert_values(batch)
- calculated_traversal_ids(
- batch.allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/336433')
- )
- end
-
- # Copied from lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb
- def calculated_traversal_ids(batch)
- <<~SQL
- WITH RECURSIVE cte(source_id, namespace_id, parent_id, height) AS (
- (
- SELECT batch.id, batch.id, batch.parent_id, 1
- FROM (#{batch.to_sql}) AS batch
- )
- UNION ALL
- (
- SELECT cte.source_id, n.id, n.parent_id, cte.height+1
- FROM namespaces n, cte
- WHERE n.id = cte.parent_id
- )
- )
- SELECT flat_hierarchy.source_id as namespace_id,
- array_agg(flat_hierarchy.namespace_id ORDER BY flat_hierarchy.height DESC) as traversal_ids
- FROM (SELECT * FROM cte FOR UPDATE) flat_hierarchy
- GROUP BY flat_hierarchy.source_id
- SQL
- end
-
- def mark_job_as_succeeded(*arguments)
- Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillCiNamespaceMirrors', arguments)
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb b/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb
deleted file mode 100644
index ff6ab9928b0..00000000000
--- a/lib/gitlab/background_migration/backfill_ci_project_mirrors.rb
+++ /dev/null
@@ -1,52 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # A job to create ci_project_mirrors entries in batches
- class BackfillCiProjectMirrors
- class Project < ActiveRecord::Base # rubocop:disable Style/Documentation
- include ::EachBatch
-
- self.table_name = 'projects'
-
- scope :base_query, -> do
- select(:id, :namespace_id)
- end
- end
-
- PAUSE_SECONDS = 0.1
- SUB_BATCH_SIZE = 500
-
- def perform(start_id, end_id)
- batch_query = Project.base_query.where(id: start_id..end_id)
- batch_query.each_batch(of: SUB_BATCH_SIZE) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first
- ranged_query = Project.unscoped.base_query.where(id: first..last)
-
- update_sql = <<~SQL
- INSERT INTO ci_project_mirrors (project_id, namespace_id)
- #{insert_values(ranged_query)}
- ON CONFLICT (project_id) DO NOTHING
- SQL
- # We do nothing on conflict because we consider they were already filled.
-
- Project.connection.execute(update_sql)
-
- sleep PAUSE_SECONDS
- end
-
- mark_job_as_succeeded(start_id, end_id)
- end
-
- private
-
- def insert_values(batch)
- batch.allow_cross_joins_across_databases(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/336433').to_sql
- end
-
- def mark_job_as_succeeded(*arguments)
- Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillCiProjectMirrors', arguments)
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/backfill_ci_runner_semver.rb b/lib/gitlab/background_migration/backfill_ci_runner_semver.rb
deleted file mode 100644
index 0901649f789..00000000000
--- a/lib/gitlab/background_migration/backfill_ci_runner_semver.rb
+++ /dev/null
@@ -1,31 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- # A job to update semver column in ci_runners in batches based on existing version values
- class BackfillCiRunnerSemver < Gitlab::BackgroundMigration::BatchedMigrationJob
- def perform
- each_sub_batch(
- operation_name: :backfill_ci_runner_semver,
- batching_scope: ->(relation) { relation.where('semver::cidr IS NULL') }
- ) do |sub_batch|
- ranged_query = sub_batch.select(
- %q(id AS r_id,
- substring(ci_runners.version FROM 'v?(\d+\.\d+\.\d+)') AS extracted_semver)
- )
-
- update_sql = <<~SQL
- UPDATE
- ci_runners
- SET semver = extracted_semver
- FROM (#{ranged_query.to_sql}) v
- WHERE id = v.r_id
- AND v.extracted_semver IS NOT NULL
- SQL
-
- connection.execute(update_sql)
- end
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/backfill_group_features.rb b/lib/gitlab/background_migration/backfill_group_features.rb
index 4c3af7be319..35b5282360f 100644
--- a/lib/gitlab/background_migration/backfill_group_features.rb
+++ b/lib/gitlab/background_migration/backfill_group_features.rb
@@ -4,19 +4,21 @@ module Gitlab
module BackgroundMigration
# Backfill group_features for an array of groups
class BackfillGroupFeatures < ::Gitlab::BackgroundMigration::BatchedMigrationJob
- def perform(batch_size)
+ job_arguments :batch_size
+
+ def perform
each_sub_batch(
operation_name: :upsert_group_features,
batching_arguments: { order_hint: :type },
batching_scope: ->(relation) { relation.where(type: 'Group') }
) do |sub_batch|
- upsert_group_features(sub_batch, batch_size)
+ upsert_group_features(sub_batch)
end
end
private
- def upsert_group_features(relation, batch_size)
+ def upsert_group_features(relation)
connection.execute(
<<~SQL
INSERT INTO group_features (group_id, created_at, updated_at)
diff --git a/lib/gitlab/background_migration/backfill_integrations_type_new.rb b/lib/gitlab/background_migration/backfill_integrations_type_new.rb
index 6f33472af7d..b07d9371c19 100644
--- a/lib/gitlab/background_migration/backfill_integrations_type_new.rb
+++ b/lib/gitlab/background_migration/backfill_integrations_type_new.rb
@@ -27,7 +27,7 @@ module Gitlab
def process_sub_batch(sub_batch)
# Extract the start/stop IDs from the current sub-batch
- sub_start_id, sub_stop_id = sub_batch.pluck(Arel.sql('MIN(id), MAX(id)')).first
+ sub_start_id, sub_stop_id = sub_batch.pick(Arel.sql('MIN(id), MAX(id)'))
# This matches the mapping from the INSERT trigger added in
# db/migrate/20210721135638_add_triggers_to_integrations_type_new.rb
diff --git a/lib/gitlab/background_migration/backfill_namespace_id_of_vulnerability_reads.rb b/lib/gitlab/background_migration/backfill_namespace_id_of_vulnerability_reads.rb
new file mode 100644
index 00000000000..cd349bf3ae1
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_namespace_id_of_vulnerability_reads.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Sets the `namespace_id` of the existing `vulnerability_reads` records
+ class BackfillNamespaceIdOfVulnerabilityReads < BatchedMigrationJob
+ UPDATE_SQL = <<~SQL
+ UPDATE
+ vulnerability_reads
+ SET
+ namespace_id = sub_query.namespace_id
+ FROM
+ (%<subquery>s) as sub_query
+ WHERE
+ vulnerability_reads.vulnerability_id = sub_query.vulnerability_id
+ SQL
+
+ def perform
+ each_sub_batch(operation_name: :set_namespace_id) do |sub_batch|
+ update_query = update_query_for(sub_batch)
+
+ connection.execute(update_query)
+ end
+ end
+
+ private
+
+ def update_query_for(sub_batch)
+ subquery = sub_batch.select("vulnerability_reads.vulnerability_id, projects.namespace_id")
+ .joins("INNER JOIN projects ON projects.id = vulnerability_reads.project_id")
+
+ format(UPDATE_SQL, subquery: subquery.to_sql)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb
index 587de1bcb5a..3b8a452b855 100644
--- a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb
+++ b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb
@@ -19,7 +19,7 @@ module Gitlab
def perform(start_id, end_id, sub_batch_size)
batch_query = Namespace.base_query.where(id: start_id..end_id)
batch_query.each_batch(of: sub_batch_size) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
ranged_query = Namespace.unscoped.base_query.where(id: first..last)
update_sql = <<~SQL
diff --git a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb
index 1c0a83285a6..c69289fb91f 100644
--- a/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb
+++ b/lib/gitlab/background_migration/backfill_namespace_traversal_ids_roots.rb
@@ -22,7 +22,7 @@ module Gitlab
.where("traversal_ids = '{}'")
ranged_query.each_batch(of: sub_batch_size) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
# The query need to be reconstructed because .each_batch modifies the default scope
# See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510
diff --git a/lib/gitlab/background_migration/backfill_project_import_level.rb b/lib/gitlab/background_migration/backfill_project_import_level.rb
new file mode 100644
index 00000000000..06706b729ea
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_project_import_level.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+# rubocop:disable Style/Documentation
+module Gitlab
+ module BackgroundMigration
+ class BackfillProjectImportLevel < BatchedMigrationJob
+ LEVEL = {
+ Gitlab::Access::NO_ACCESS => [0],
+ Gitlab::Access::DEVELOPER => [2],
+ Gitlab::Access::MAINTAINER => [1],
+ Gitlab::Access::OWNER => [nil]
+ }.freeze
+
+ def perform
+ each_sub_batch(operation_name: :update_import_level) do |sub_batch|
+ update_import_level(sub_batch)
+ end
+ end
+
+ private
+
+ def update_import_level(relation)
+ LEVEL.each do |import_level, creation_level|
+ namespace_ids = relation
+ .where(type: 'Group', project_creation_level: creation_level)
+
+ NamespaceSetting.where(
+ namespace_id: namespace_ids
+ ).update_all(project_import_level: import_level)
+ end
+ end
+ end
+ end
+end
+
+# rubocop:enable Style/Documentation
diff --git a/lib/gitlab/background_migration/backfill_vulnerability_reads_cluster_agent.rb b/lib/gitlab/background_migration/backfill_vulnerability_reads_cluster_agent.rb
new file mode 100644
index 00000000000..728b60f7a0e
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_vulnerability_reads_cluster_agent.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Backfills the `vulnerability_reads.casted_cluster_agent_id` column
+ class BackfillVulnerabilityReadsClusterAgent < Gitlab::BackgroundMigration::BatchedMigrationJob
+ CLUSTER_AGENTS_JOIN = <<~SQL
+ INNER JOIN cluster_agents
+ ON CAST(vulnerability_reads.cluster_agent_id AS bigint) = cluster_agents.id AND
+ vulnerability_reads.project_id = cluster_agents.project_id
+ SQL
+
+ RELATION = ->(relation) do
+ relation
+ .where(report_type: 7)
+ end
+
+ def perform
+ each_sub_batch(
+ operation_name: :update_all,
+ batching_scope: RELATION
+ ) do |sub_batch|
+ sub_batch
+ .joins(CLUSTER_AGENTS_JOIN)
+ .update_all('casted_cluster_agent_id = CAST(vulnerability_reads.cluster_agent_id AS bigint)')
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb b/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb
index a16efa4222b..32962f2bb89 100644
--- a/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb
+++ b/lib/gitlab/background_migration/backfill_work_item_type_id_for_issues.rb
@@ -20,7 +20,7 @@ module Gitlab
parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id, base_type)
parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
# The query need to be reconstructed because .each_batch modifies the default scope
# See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510
diff --git a/lib/gitlab/background_migration/batched_migration_job.rb b/lib/gitlab/background_migration/batched_migration_job.rb
index c47b1735ccf..11d15804344 100644
--- a/lib/gitlab/background_migration/batched_migration_job.rb
+++ b/lib/gitlab/background_migration/batched_migration_job.rb
@@ -3,22 +3,62 @@
module Gitlab
module BackgroundMigration
# Base class for batched background migrations. Subclasses should implement the `#perform`
- # method as the entry point for the job's execution, which will be called with the migration
- # arguments (if any).
+ # method as the entry point for the job's execution.
+ #
+ # Job arguments needed must be defined explicitly,
+ # see https://docs.gitlab.com/ee/development/database/batched_background_migrations.html#job-arguments.
class BatchedMigrationJob
include Gitlab::Database::DynamicModelHelpers
- def initialize(start_id:, end_id:, batch_table:, batch_column:, sub_batch_size:, pause_ms:, connection:)
+ def initialize(
+ start_id:, end_id:, batch_table:, batch_column:, sub_batch_size:, pause_ms:, job_arguments: [], connection:
+ )
+
@start_id = start_id
@end_id = end_id
@batch_table = batch_table
@batch_column = batch_column
@sub_batch_size = sub_batch_size
@pause_ms = pause_ms
+ @job_arguments = job_arguments
@connection = connection
end
- def perform(*job_arguments)
+ def self.generic_instance(batch_table:, batch_column:, job_arguments: [], connection:)
+ new(
+ batch_table: batch_table, batch_column: batch_column,
+ job_arguments: job_arguments, connection: connection,
+ start_id: 0, end_id: 0, sub_batch_size: 0, pause_ms: 0
+ )
+ end
+
+ def self.job_arguments_count
+ 0
+ end
+
+ def self.job_arguments(*args)
+ args.each.with_index do |arg, index|
+ define_method(arg) do
+ @job_arguments[index]
+ end
+ end
+
+ define_singleton_method(:job_arguments_count) do
+ args.count
+ end
+ end
+
+ def self.scope_to(scope)
+ define_method(:filter_batch) do |relation|
+ instance_exec(relation, &scope)
+ end
+ end
+
+ def filter_batch(relation)
+ relation
+ end
+
+ def perform
raise NotImplementedError, "subclasses of #{self.class.name} must implement #{__method__}"
end
@@ -33,9 +73,10 @@ module Gitlab
def each_sub_batch(operation_name: :default, batching_arguments: {}, batching_scope: nil)
all_batching_arguments = { column: batch_column, of: sub_batch_size }.merge(batching_arguments)
- parent_relation = parent_batch_relation(batching_scope)
+ relation = filter_batch(base_relation)
+ sub_batch_relation = filter_sub_batch(relation, batching_scope)
- parent_relation.each_batch(**all_batching_arguments) do |relation|
+ sub_batch_relation.each_batch(**all_batching_arguments) do |relation|
batch_metrics.instrument_operation(operation_name) do
yield relation
end
@@ -45,9 +86,13 @@ module Gitlab
end
def distinct_each_batch(operation_name: :default, batching_arguments: {})
+ if base_relation != filter_batch(base_relation)
+ raise 'distinct_each_batch can not be used when additional filters are defined with scope_to'
+ end
+
all_batching_arguments = { column: batch_column, of: sub_batch_size }.merge(batching_arguments)
- parent_batch_relation.distinct_each_batch(**all_batching_arguments) do |relation|
+ base_relation.distinct_each_batch(**all_batching_arguments) do |relation|
batch_metrics.instrument_operation(operation_name) do
yield relation
end
@@ -56,13 +101,15 @@ module Gitlab
end
end
- def parent_batch_relation(batching_scope = nil)
- parent_relation = define_batchable_model(batch_table, connection: connection)
+ def base_relation
+ define_batchable_model(batch_table, connection: connection)
.where(batch_column => start_id..end_id)
+ end
- return parent_relation unless batching_scope
+ def filter_sub_batch(relation, batching_scope = nil)
+ return relation unless batching_scope
- batching_scope.call(parent_relation)
+ batching_scope.call(relation)
end
end
end
diff --git a/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb
index 68be42dc0a0..12fd9ae7161 100644
--- a/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb
+++ b/lib/gitlab/background_migration/batching_strategies/backfill_project_namespace_per_group_batching_strategy.rb
@@ -25,7 +25,7 @@ module Gitlab
relation = model_class.where(projects_table[:namespace_id].in(hierarchy_cte_sql)).where("#{quoted_column_name} >= ?", batch_min_value)
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
- next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
+ next_batch_bounds = batch.pick(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})"))
break
end
diff --git a/lib/gitlab/background_migration/batching_strategies/backfill_vulnerability_reads_cluster_agent_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/backfill_vulnerability_reads_cluster_agent_batching_strategy.rb
new file mode 100644
index 00000000000..f0d015198dc
--- /dev/null
+++ b/lib/gitlab/background_migration/batching_strategies/backfill_vulnerability_reads_cluster_agent_batching_strategy.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ module BatchingStrategies
+ # Batching class to use for back-filling vulnerability_read's casted_cluster_agent_id from cluster_agent_id.
+ # Batches will be scoped to records where the report_type belongs to cluster_image_scanning.
+ #
+ # If no more batches exist in the table, returns nil.
+ class BackfillVulnerabilityReadsClusterAgentBatchingStrategy < PrimaryKeyBatchingStrategy
+ CLUSTER_IMAGE_SCANNING_REPORT_TYPE = 7
+
+ def apply_additional_filters(relation, job_arguments: [], job_class: nil)
+ relation.where(report_type: CLUSTER_IMAGE_SCANNING_REPORT_TYPE)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb
index 5cad9d2e3c4..fc08d2b0ab6 100644
--- a/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb
+++ b/lib/gitlab/background_migration/batching_strategies/loose_index_scan_batching_strategy.rb
@@ -24,7 +24,7 @@ module Gitlab
next_batch_bounds = nil
relation.distinct_each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
- next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
+ next_batch_bounds = batch.pick(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})"))
break
end
diff --git a/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb b/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb
index c2f59bf9c76..1ffa4a052e5 100644
--- a/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb
+++ b/lib/gitlab/background_migration/batching_strategies/primary_key_batching_strategy.rb
@@ -24,11 +24,19 @@ module Gitlab
quoted_column_name = model_class.connection.quote_column_name(column_name)
relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value)
+
+ if job_class
+ relation = filter_batch(relation,
+ table_name: table_name, column_name: column_name,
+ job_class: job_class, job_arguments: job_arguments
+ )
+ end
+
relation = apply_additional_filters(relation, job_arguments: job_arguments, job_class: job_class)
next_batch_bounds = nil
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
- next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
+ next_batch_bounds = batch.pick(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})"))
break
end
@@ -36,13 +44,27 @@ module Gitlab
next_batch_bounds
end
+ # Deprecated
+ #
+ # Use `scope_to` to define additional filters on the migration job class.
+ #
+ # see https://docs.gitlab.com/ee/development/database/batched_background_migrations.html#adding-additional-filters.
def apply_additional_filters(relation, job_arguments: [], job_class: nil)
- if job_class.respond_to?(:batching_scope)
- return job_class.batching_scope(relation, job_arguments: job_arguments)
- end
-
relation
end
+
+ private
+
+ def filter_batch(relation, table_name:, column_name:, job_class:, job_arguments: [])
+ return relation unless job_class.respond_to?(:generic_instance)
+
+ job = job_class.generic_instance(
+ batch_table: table_name, batch_column: column_name,
+ job_arguments: job_arguments, connection: connection
+ )
+
+ job.filter_batch(relation)
+ end
end
end
end
diff --git a/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb b/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb
index cb9b0e88ef4..4da120769a0 100644
--- a/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb
+++ b/lib/gitlab/background_migration/cleanup_orphaned_lfs_objects_projects.rb
@@ -62,7 +62,7 @@ module Gitlab
batch = LfsObjectsProject.where(id: start_id..end_id)
batch.each_batch(of: SUB_BATCH_SIZE) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(lfs_objects_projects.id), max(lfs_objects_projects.id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(lfs_objects_projects.id), max(lfs_objects_projects.id)'))
lfs_objects_without_association =
LfsObjectsProject
diff --git a/lib/gitlab/background_migration/copy_ci_builds_columns_to_security_scans.rb b/lib/gitlab/background_migration/copy_ci_builds_columns_to_security_scans.rb
deleted file mode 100644
index 107ac9b0c3b..00000000000
--- a/lib/gitlab/background_migration/copy_ci_builds_columns_to_security_scans.rb
+++ /dev/null
@@ -1,44 +0,0 @@
-# frozen_string_literal: true
-# rubocop:disable Style/Documentation
-
-module Gitlab
- module BackgroundMigration
- class CopyCiBuildsColumnsToSecurityScans
- extend ::Gitlab::Utils::Override
-
- UPDATE_BATCH_SIZE = 500
-
- def perform(start_id, stop_id)
- (start_id..stop_id).step(UPDATE_BATCH_SIZE).each do |offset|
- batch_start = offset
- batch_stop = offset + UPDATE_BATCH_SIZE - 1
-
- ActiveRecord::Base.connection.execute <<~SQL
- UPDATE
- security_scans
- SET
- project_id = ci_builds.project_id,
- pipeline_id = ci_builds.commit_id
- FROM ci_builds
- WHERE ci_builds.type='Ci::Build'
- AND ci_builds.id=security_scans.build_id
- AND security_scans.id BETWEEN #{Integer(batch_start)} AND #{Integer(batch_stop)}
- SQL
- end
-
- mark_job_as_succeeded(start_id, stop_id)
- rescue StandardError => error
- Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
- end
-
- private
-
- def mark_job_as_succeeded(*arguments)
- Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
- 'CopyCiBuildsColumnsToSecurityScans',
- arguments
- )
- end
- end
- end
-end
diff --git a/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
index 826845935b8..15e54431a44 100644
--- a/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
+++ b/lib/gitlab/background_migration/copy_column_using_background_migration_job.rb
@@ -14,7 +14,9 @@ module Gitlab
# - The table that is migrated does _not_ need `id` as the primary key
# We use the provided primary_key column to perform the update.
class CopyColumnUsingBackgroundMigrationJob < BatchedMigrationJob
- def perform(copy_from, copy_to)
+ job_arguments :copy_from, :copy_to
+
+ def perform
assignment_clauses = build_assignment_clauses(copy_from, copy_to)
each_sub_batch(operation_name: :update_all) do |relation|
diff --git a/lib/gitlab/background_migration/disable_legacy_open_source_license_for_no_issues_no_repo_projects.rb b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_no_issues_no_repo_projects.rb
new file mode 100644
index 00000000000..019c3d15b3e
--- /dev/null
+++ b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_no_issues_no_repo_projects.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Set `project_settings.legacy_open_source_license_available` to false for public projects with no issues & no repo
+ class DisableLegacyOpenSourceLicenseForNoIssuesNoRepoProjects < ::Gitlab::BackgroundMigration::BatchedMigrationJob
+ PUBLIC = 20
+
+ # Migration only version of `project_settings` table
+ class ProjectSetting < ApplicationRecord
+ self.table_name = 'project_settings'
+ end
+
+ def perform
+ each_sub_batch(
+ operation_name: :disable_legacy_open_source_license_for_no_issues_no_repo_projects,
+ batching_scope: ->(relation) { relation.where(visibility_level: PUBLIC) }
+ ) do |sub_batch|
+ no_issues_no_repo_projects =
+ sub_batch
+ .joins('LEFT OUTER JOIN project_statistics ON project_statistics.project_id = projects.id')
+ .joins('LEFT OUTER JOIN project_settings ON project_settings.project_id = projects.id')
+ .joins('LEFT OUTER JOIN issues ON issues.project_id = projects.id')
+ .where('project_statistics.repository_size' => 0,
+ 'project_settings.legacy_open_source_license_available' => true)
+ .group('projects.id')
+ .having('COUNT(issues.id) = 0')
+
+ ProjectSetting
+ .where(project_id: no_issues_no_repo_projects)
+ .update_all(legacy_open_source_license_available: false)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/disable_legacy_open_source_license_for_one_member_no_repo_projects.rb b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_one_member_no_repo_projects.rb
new file mode 100644
index 00000000000..3a9049b1f19
--- /dev/null
+++ b/lib/gitlab/background_migration/disable_legacy_open_source_license_for_one_member_no_repo_projects.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # Set `project_settings.legacy_open_source_license_available` to false for public projects with 1 member and no repo
+ class DisableLegacyOpenSourceLicenseForOneMemberNoRepoProjects < ::Gitlab::BackgroundMigration::BatchedMigrationJob
+ PUBLIC = 20
+
+ # Migration only version of `project_settings` table
+ class ProjectSetting < ApplicationRecord
+ self.table_name = 'project_settings'
+ end
+
+ def perform
+ each_sub_batch(
+ operation_name: :disable_legacy_open_source_license_for_one_member_no_repo_projects,
+ batching_scope: ->(relation) { relation.where(visibility_level: PUBLIC) }
+ ) do |sub_batch|
+ one_member_no_repo_projects =
+ sub_batch
+ .joins('LEFT OUTER JOIN project_statistics ON project_statistics.project_id = projects.id')
+ .joins('LEFT OUTER JOIN project_settings ON project_settings.project_id = projects.id')
+ .joins('LEFT OUTER JOIN project_authorizations ON project_authorizations.project_id = projects.id')
+ .where('project_statistics.repository_size' => 0,
+ 'project_settings.legacy_open_source_license_available' => true)
+ .group('projects.id')
+ .having('COUNT(project_authorizations.user_id) = 1')
+
+ ProjectSetting
+ .where(project_id: one_member_no_repo_projects)
+ .update_all(legacy_open_source_license_available: false)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/background_migration/drop_invalid_security_findings.rb b/lib/gitlab/background_migration/drop_invalid_security_findings.rb
index 87551bb1b1e..000628e109c 100644
--- a/lib/gitlab/background_migration/drop_invalid_security_findings.rb
+++ b/lib/gitlab/background_migration/drop_invalid_security_findings.rb
@@ -19,7 +19,7 @@ module Gitlab
.no_uuid
ranged_query.each_batch(of: sub_batch_size) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
# The query need to be reconstructed because .each_batch modifies the default scope
# See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510
diff --git a/lib/gitlab/background_migration/encrypt_static_object_token.rb b/lib/gitlab/background_migration/encrypt_static_object_token.rb
index a087d2529eb..e1805d40bab 100644
--- a/lib/gitlab/background_migration/encrypt_static_object_token.rb
+++ b/lib/gitlab/background_migration/encrypt_static_object_token.rb
@@ -23,7 +23,7 @@ module Gitlab
.without_static_object_token_encrypted
ranged_query.each_batch(of: BATCH_SIZE) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
batch_query = User.unscoped
.where(id: first..last)
diff --git a/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb b/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb
index 8f785476aa0..6de2187b8e3 100644
--- a/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb
+++ b/lib/gitlab/background_migration/fix_first_mentioned_in_commit_at.rb
@@ -29,7 +29,7 @@ module Gitlab
def perform(start_id, end_id)
scope(start_id, end_id).each_batch(of: SUB_BATCH_SIZE, column: :issue_id) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(issue_id), max(issue_id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(issue_id), max(issue_id)'))
# The query need to be reconstructed because .each_batch modifies the default scope
# See: https://gitlab.com/gitlab-org/gitlab/-/issues/330510
diff --git a/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb b/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb
index 496ec0bd0a1..97a9913fa74 100644
--- a/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb
+++ b/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb
@@ -137,7 +137,7 @@ module Gitlab
def create_sql(from_id, to_id)
<<~SQL
WITH created_records AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
- INSERT INTO services (project_id, #{DEFAULTS.keys.map { |key| %("#{key}")}.join(',')}, created_at, updated_at)
+ INSERT INTO services (project_id, #{DEFAULTS.keys.map { |key| %("#{key}") }.join(',')}, created_at, updated_at)
#{select_insert_values_sql(from_id, to_id)}
RETURNING *
)
diff --git a/lib/gitlab/background_migration/migrate_shared_vulnerability_scanners.rb b/lib/gitlab/background_migration/migrate_shared_vulnerability_scanners.rb
new file mode 100644
index 00000000000..bea0120f093
--- /dev/null
+++ b/lib/gitlab/background_migration/migrate_shared_vulnerability_scanners.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # rubocop: disable Style/Documentation
+ class MigrateSharedVulnerabilityScanners < BatchedMigrationJob
+ def perform
+ end
+ end
+ # rubocop: enable Style/Documentation
+ end
+end
+
+# rubocop: disable Layout/LineLength
+Gitlab::BackgroundMigration::MigrateSharedVulnerabilityScanners.prepend_mod_with("Gitlab::BackgroundMigration::MigrateSharedVulnerabilityScanners")
+# rubocop: enable Layout/LineLength
diff --git a/lib/gitlab/background_migration/populate_status_column_of_security_scans.rb b/lib/gitlab/background_migration/populate_status_column_of_security_scans.rb
deleted file mode 100644
index 9740bcaa86b..00000000000
--- a/lib/gitlab/background_migration/populate_status_column_of_security_scans.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
- module BackgroundMigration
- class PopulateStatusColumnOfSecurityScans # rubocop:disable Style/Documentation
- def perform(_start_id, _end_id)
- # no-op
- end
- end
- end
-end
-
-Gitlab::BackgroundMigration::PopulateStatusColumnOfSecurityScans.prepend_mod
diff --git a/lib/gitlab/background_migration/populate_vulnerability_reads.rb b/lib/gitlab/background_migration/populate_vulnerability_reads.rb
index 5e6475a3d1a..656c62d9ee5 100644
--- a/lib/gitlab/background_migration/populate_vulnerability_reads.rb
+++ b/lib/gitlab/background_migration/populate_vulnerability_reads.rb
@@ -10,7 +10,7 @@ module Gitlab
def perform(start_id, end_id, sub_batch_size)
vulnerability_model.where(id: start_id..end_id).each_batch(of: sub_batch_size) do |sub_batch|
- first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
+ first, last = sub_batch.pick(Arel.sql('min(id), max(id)'))
connection.execute(insert_query(first, last))
sleep PAUSE_SECONDS
diff --git a/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb
index 2b27bad3497..845a3c16bbe 100644
--- a/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb
+++ b/lib/gitlab/background_migration/project_namespaces/backfill_project_namespaces.rb
@@ -105,9 +105,11 @@ module Gitlab
.joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id")
.select("namespaces.id as project_namespace_id, n2.traversal_ids")
+ # some customers have namespaces.id column type as bigint, which makes array_append(integer[], bigint) to fail
+ # so we just explicitly cast arguments to compatible types
ApplicationRecord.connection.execute <<~SQL
UPDATE namespaces
- SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id)
+ SET traversal_ids = array_append(project_namespaces.traversal_ids::bigint[], project_namespaces.project_namespace_id::bigint)
FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids)
WHERE id = project_namespaces.project_namespace_id
SQL
diff --git a/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb b/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb
index bba1ca26b35..e9a38916999 100644
--- a/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb
+++ b/lib/gitlab/background_migration/update_jira_tracker_data_deployment_type_based_on_url.rb
@@ -1,42 +1,74 @@
# frozen_string_literal: true
# rubocop: disable Style/Documentation
-class Gitlab::BackgroundMigration::UpdateJiraTrackerDataDeploymentTypeBasedOnUrl
- # rubocop: disable Gitlab/NamespacedClass
- class JiraTrackerData < ActiveRecord::Base
- self.table_name = "jira_tracker_data"
- self.inheritance_column = :_type_disabled
+module Gitlab
+ module BackgroundMigration
+ class UpdateJiraTrackerDataDeploymentTypeBasedOnUrl < Gitlab::BackgroundMigration::BatchedMigrationJob
+ # rubocop: disable Gitlab/NamespacedClass
+ class JiraTrackerData < ActiveRecord::Base
+ self.table_name = "jira_tracker_data"
+ self.inheritance_column = :_type_disabled
- include ::Integrations::BaseDataFields
- attr_encrypted :url, encryption_options
- attr_encrypted :api_url, encryption_options
+ include ::Integrations::BaseDataFields
+ attr_encrypted :url, encryption_options
+ attr_encrypted :api_url, encryption_options
- enum deployment_type: { unknown: 0, server: 1, cloud: 2 }, _prefix: :deployment
- end
- # rubocop: enable Gitlab/NamespacedClass
+ enum deployment_type: { unknown: 0, server: 1, cloud: 2 }, _prefix: :deployment
+ end
+ # rubocop: enable Gitlab/NamespacedClass
- # https://rubular.com/r/uwgK7k9KH23efa
- JIRA_CLOUD_REGEX = %r{^https?://[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?\.atlassian\.net$}ix.freeze
+ # https://rubular.com/r/uwgK7k9KH23efa
+ JIRA_CLOUD_REGEX = %r{^https?://[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?\.atlassian\.net$}ix.freeze
- # rubocop: disable CodeReuse/ActiveRecord
- def perform(start_id, end_id)
- trackers_data = JiraTrackerData
- .where(deployment_type: 'unknown')
- .where(id: start_id..end_id)
+ def perform
+ cloud = []
+ server = []
+ unknown = []
- cloud, server = trackers_data.partition { |tracker_data| tracker_data.url.match?(JIRA_CLOUD_REGEX) }
+ trackers_data.each do |tracker_data|
+ client_url = tracker_data.api_url.presence || tracker_data.url
- cloud_mappings = cloud.each_with_object({}) do |tracker_data, hash|
- hash[tracker_data] = { deployment_type: 2 }
- end
+ if client_url.blank?
+ unknown << tracker_data
+ elsif client_url.match?(JIRA_CLOUD_REGEX)
+ cloud << tracker_data
+ else
+ server << tracker_data
+ end
+ end
- server_mapppings = server.each_with_object({}) do |tracker_data, hash|
- hash[tracker_data] = { deployment_type: 1 }
- end
+ cloud_mappings = cloud.each_with_object({}) do |tracker_data, hash|
+ hash[tracker_data] = { deployment_type: 2 }
+ end
+
+ server_mappings = server.each_with_object({}) do |tracker_data, hash|
+ hash[tracker_data] = { deployment_type: 1 }
+ end
+
+ unknown_mappings = unknown.each_with_object({}) do |tracker_data, hash|
+ hash[tracker_data] = { deployment_type: 0 }
+ end
- mappings = cloud_mappings.merge(server_mapppings)
+ mappings = cloud_mappings.merge(server_mappings, unknown_mappings)
- ::Gitlab::Database::BulkUpdate.execute(%i[deployment_type], mappings)
+ update_records(mappings)
+ end
+
+ private
+
+ def update_records(mappings)
+ return if mappings.empty?
+
+ ::Gitlab::Database::BulkUpdate.execute(%i[deployment_type], mappings)
+ end
+
+ # rubocop: disable CodeReuse/ActiveRecord
+ def trackers_data
+ @trackers_data ||= JiraTrackerData
+ .where(deployment_type: 'unknown')
+ .where(batch_column => start_id..end_id)
+ end
+ # rubocop: enable CodeReuse/ActiveRecord
+ end
end
- # rubocop: enable CodeReuse/ActiveRecord
end
diff --git a/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb
index 38932e52bb0..b61f2ee7f4c 100644
--- a/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb
+++ b/lib/gitlab/background_migration/update_timelogs_null_spent_at.rb
@@ -12,7 +12,7 @@ module Gitlab
define_batchable_model('timelogs', connection: connection)
.where(spent_at: nil, id: start_id..stop_id)
.each_batch(of: 100) do |subbatch|
- batch_start, batch_end = subbatch.pluck('min(id), max(id)').first
+ batch_start, batch_end = subbatch.pick('min(id), max(id)')
update_timelogs(batch_start, batch_end)
end