1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module ProjectNamespaces
# Back-fill project namespaces for projects that do not yet have a namespace.
#
# TODO: remove this comment when an actuall backfill migration is added.
#
# This is first being added without an actual migration as we need to initially test
# if backfilling project namespaces affects performance in any significant way.
# rubocop: disable Metrics/ClassLength
class BackfillProjectNamespaces
BATCH_SIZE = 100
DELETE_BATCH_SIZE = 10
PROJECT_NAMESPACE_STI_NAME = 'Project'
IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models
def perform(start_id, end_id, namespace_id, migration_type = 'up')
load_project_ids(start_id, end_id, namespace_id)
case migration_type
when 'up'
backfill_project_namespaces(namespace_id)
mark_job_as_succeeded(start_id, end_id, namespace_id, 'up')
when 'down'
cleanup_backfilled_project_namespaces(namespace_id)
mark_job_as_succeeded(start_id, end_id, namespace_id, 'down')
else
raise "Unknown migration type"
end
end
private
attr_accessor :project_ids
def backfill_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids|
# We need to lock these project records for the period when we create project namespaces
# and link them to projects so that if a project is modified in the time between creating
# project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects`
# we do not get them out of sync.
#
# see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72527#note_730679469
Project.transaction do
Project.where(id: project_ids).select(:id).lock!('FOR UPDATE')
batch_insert_namespaces(project_ids)
batch_update_projects(project_ids)
end
batch_update_project_namespaces_traversal_ids(project_ids)
end
end
def cleanup_backfilled_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids|
# IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records
# from namespaces are deleted due to FK/triggers
nullify_project_namespaces_in_projects(project_ids)
delete_project_namespace_records(project_ids)
end
end
def batch_insert_namespaces(project_ids)
projects = IsolatedModels::Project.where(id: project_ids)
.select("projects.id, projects.name, projects.path, projects.namespace_id, projects.visibility_level, shared_runners_enabled, '#{PROJECT_NAMESPACE_STI_NAME}', now(), now()")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (tmp_project_id, name, path, parent_id, visibility_level, shared_runners_enabled, type, created_at, updated_at)
#{projects.to_sql}
ON CONFLICT DO NOTHING;
SQL
end
def batch_update_projects(project_ids)
projects = IsolatedModels::Project.where(id: project_ids)
.joins("INNER JOIN namespaces ON projects.id = namespaces.tmp_project_id")
.select("namespaces.id, namespaces.tmp_project_id")
ActiveRecord::Base.connection.execute <<~SQL
WITH cte(project_namespace_id, project_id) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
#{projects.to_sql}
)
UPDATE projects
SET project_namespace_id = cte.project_namespace_id
FROM cte
WHERE id = cte.project_id AND projects.project_namespace_id IS DISTINCT FROM cte.project_namespace_id
SQL
end
def batch_update_project_namespaces_traversal_ids(project_ids)
namespaces = Namespace.where(tmp_project_id: project_ids)
.joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id")
.select("namespaces.id as project_namespace_id, n2.traversal_ids")
ActiveRecord::Base.connection.execute <<~SQL
UPDATE namespaces
SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id)
FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids)
WHERE id = project_namespaces.project_namespace_id
SQL
end
def nullify_project_namespaces_in_projects(project_ids)
IsolatedModels::Project.where(id: project_ids).update_all(project_namespace_id: nil)
end
def delete_project_namespace_records(project_ids)
project_ids.each_slice(DELETE_BATCH_SIZE) do |p_ids|
IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all
end
end
def load_project_ids(start_id, end_id, namespace_id)
projects = IsolatedModels::Project.arel_table
relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id))
relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(hierarchy_cte(namespace_id)))) if namespace_id
@project_ids = relation.pluck(:id)
end
def mark_job_as_succeeded(*arguments)
::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments)
end
def hierarchy_cte(root_namespace_id)
<<-SQL
WITH RECURSIVE "base_and_descendants" AS (
(
SELECT "namespaces"."id"
FROM "namespaces"
WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = #{root_namespace_id.to_i}
)
UNION
(
SELECT "namespaces"."id"
FROM "namespaces", "base_and_descendants"
WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id"
)
)
SELECT "id" FROM "base_and_descendants" AS "namespaces"
SQL
end
end
# rubocop: enable Metrics/ClassLength
end
end
end
|