1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class ProjectForeignKeysWithCascadingDeletes < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
CONCURRENCY = 4
disable_ddl_transaction!
# The tables/columns for which to remove orphans and add foreign keys. Order
# matters as some tables/columns should be processed before others.
TABLES = [
[:boards, :projects, :project_id],
[:lists, :labels, :label_id],
[:lists, :boards, :board_id],
[:services, :projects, :project_id],
[:forked_project_links, :projects, :forked_to_project_id],
[:merge_requests, :projects, :target_project_id],
[:labels, :projects, :project_id],
[:issues, :projects, :project_id],
[:events, :projects, :project_id],
[:milestones, :projects, :project_id],
[:notes, :projects, :project_id],
[:snippets, :projects, :project_id],
[:web_hooks, :projects, :project_id],
[:protected_branch_merge_access_levels, :protected_branches, :protected_branch_id],
[:protected_branch_push_access_levels, :protected_branches, :protected_branch_id],
[:protected_branches, :projects, :project_id],
[:protected_tags, :projects, :project_id],
[:deploy_keys_projects, :projects, :project_id],
[:users_star_projects, :projects, :project_id],
[:releases, :projects, :project_id],
[:project_group_links, :projects, :project_id],
[:pages_domains, :projects, :project_id],
[:todos, :projects, :project_id],
[:project_import_data, :projects, :project_id],
[:project_features, :projects, :project_id],
[:ci_builds, :projects, :project_id],
[:ci_pipelines, :projects, :project_id],
[:ci_runner_projects, :projects, :project_id],
[:ci_triggers, :projects, :project_id],
[:environments, :projects, :project_id],
[:deployments, :projects, :project_id]
]
def up
# These existing foreign keys don't have an "ON DELETE CASCADE" clause.
remove_foreign_key_without_error(:boards, :project_id)
remove_foreign_key_without_error(:lists, :label_id)
remove_foreign_key_without_error(:lists, :board_id)
remove_foreign_key_without_error(:protected_branch_merge_access_levels,
:protected_branch_id)
remove_foreign_key_without_error(:protected_branch_push_access_levels,
:protected_branch_id)
remove_orphaned_rows
add_foreign_keys
# These columns are not indexed yet, meaning a cascading delete would take
# forever.
add_concurrent_index(:project_group_links, :project_id)
add_concurrent_index(:pages_domains, :project_id)
end
def down
TABLES.each do |(source, _, column)|
remove_foreign_key_without_error(source, column)
end
add_concurrent_foreign_key(:boards, :projects, column: :project_id)
add_concurrent_foreign_key(:lists, :labels, column: :label_id)
add_concurrent_foreign_key(:lists, :boards, column: :board_id)
add_concurrent_foreign_key(:protected_branch_merge_access_levels,
:protected_branches,
column: :protected_branch_id)
add_concurrent_foreign_key(:protected_branch_push_access_levels,
:protected_branches,
column: :protected_branch_id)
remove_index_without_error(:project_group_links, :project_id)
remove_index_without_error(:pages_domains, :project_id)
end
def add_foreign_keys
TABLES.each do |(source, target, column)|
add_concurrent_foreign_key(source, target, column: column)
end
end
# Removes orphans from various tables concurrently.
def remove_orphaned_rows
Gitlab::Database.with_connection_pool(CONCURRENCY) do |pool|
queues = queues_for_rows(TABLES)
threads = queues.map do |queue|
Thread.new do
pool.with_connection do |connection|
Thread.current[:foreign_key_connection] = connection
# Disables statement timeouts for the current connection. This is
# necessary as removing of orphaned data might otherwise exceed the
# statement timeout.
disable_statement_timeout
remove_orphans(*queue.pop) until queue.empty?
steal_from_queues(queues - [queue])
end
end
end
threads.each(&:join)
end
end
def steal_from_queues(queues)
loop do
stolen = false
queues.each do |queue|
# Stealing is racy so it's possible a pop might be called on an
# already-empty queue.
begin
remove_orphans(*queue.pop(true))
stolen = true
rescue ThreadError
end
end
break unless stolen
end
end
def remove_orphans(source, target, column)
quoted_source = quote_table_name(source)
quoted_target = quote_table_name(target)
quoted_column = quote_column_name(column)
execute <<-EOF.strip_heredoc
DELETE FROM #{quoted_source}
WHERE NOT EXISTS (
SELECT true
FROM #{quoted_target}
WHERE #{quoted_target}.id = #{quoted_source}.#{quoted_column}
)
AND #{quoted_source}.#{quoted_column} IS NOT NULL
EOF
end
def remove_foreign_key_without_error(table, column)
remove_foreign_key(table, column: column)
rescue ArgumentError
end
def remove_index_without_error(table, column)
remove_concurrent_index(table, column)
rescue ArgumentError
end
def connection
# Rails memoizes connection objects, but this causes them to be shared
# amongst threads; we don't want that.
Thread.current[:foreign_key_connection] || ActiveRecord::Base.connection
end
def queues_for_rows(rows)
queues = Array.new(CONCURRENCY) { Queue.new }
slice_size = rows.length / CONCURRENCY
# Divide all the tuples as evenly as possible amongst the queues.
rows.each_slice(slice_size).each_with_index do |slice, index|
bucket = index % CONCURRENCY
slice.each do |row|
queues[bucket] << row
end
end
queues
end
end
|