summaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2017-05-30 13:54:59 +0200
committerYorick Peterse <yorickpeterse@gmail.com>2017-07-06 12:01:36 +0200
commitc63e3221587daf9e7464f7d2079ca8ed3111f6ff (patch)
tree386eb322d420d4ddf5a4bff514334f686b58c9fb /db
parent050eae8c4dff87fef63e79eb60d927d0171b5f7b (diff)
downloadgitlab-ce-c63e3221587daf9e7464f7d2079ca8ed3111f6ff.tar.gz
Add many foreign keys to the projects table
This removes the need for relying on Rails' "dependent" option for data removal, which is _incredibly_ slow (even when using :delete_all) when deleting large amounts of data. This also ensures data consistency is enforced on DB level and not on application level (something Rails is really bad at). This commit also includes various migrations to add foreign keys to tables that eventually point to "projects" to ensure no rows get orphaned upon removing a project.
Diffstat (limited to 'db')
-rw-r--r--db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb187
-rw-r--r--db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb40
-rw-r--r--db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb30
-rw-r--r--db/schema.rb39
4 files changed, 291 insertions, 5 deletions
diff --git a/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb
new file mode 100644
index 00000000000..3eaafac321d
--- /dev/null
+++ b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb
@@ -0,0 +1,187 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class ProjectForeignKeysWithCascadingDeletes < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ CONCURRENCY = 4
+
+ disable_ddl_transaction!
+
+ # The tables/columns for which to remove orphans and add foreign keys. Order
+ # matters as some tables/columns should be processed before others.
+ TABLES = [
+ [:boards, :projects, :project_id],
+ [:lists, :labels, :label_id],
+ [:lists, :boards, :board_id],
+ [:services, :projects, :project_id],
+ [:forked_project_links, :projects, :forked_to_project_id],
+ [:merge_requests, :projects, :target_project_id],
+ [:labels, :projects, :project_id],
+ [:issues, :projects, :project_id],
+ [:events, :projects, :project_id],
+ [:milestones, :projects, :project_id],
+ [:notes, :projects, :project_id],
+ [:snippets, :projects, :project_id],
+ [:web_hooks, :projects, :project_id],
+ [:protected_branch_merge_access_levels, :protected_branches, :protected_branch_id],
+ [:protected_branch_push_access_levels, :protected_branches, :protected_branch_id],
+ [:protected_branches, :projects, :project_id],
+ [:protected_tags, :projects, :project_id],
+ [:deploy_keys_projects, :projects, :project_id],
+ [:users_star_projects, :projects, :project_id],
+ [:releases, :projects, :project_id],
+ [:project_group_links, :projects, :project_id],
+ [:pages_domains, :projects, :project_id],
+ [:todos, :projects, :project_id],
+ [:project_import_data, :projects, :project_id],
+ [:project_features, :projects, :project_id],
+ [:ci_builds, :projects, :project_id],
+ [:ci_pipelines, :projects, :project_id],
+ [:ci_runner_projects, :projects, :project_id],
+ [:ci_triggers, :projects, :project_id],
+ [:environments, :projects, :project_id],
+ [:deployments, :projects, :project_id]
+ ]
+
+ def up
+ # These existing foreign keys don't have an "ON DELETE CASCADE" clause.
+ remove_foreign_key_without_error(:boards, :project_id)
+ remove_foreign_key_without_error(:lists, :label_id)
+ remove_foreign_key_without_error(:lists, :board_id)
+ remove_foreign_key_without_error(:protected_branch_merge_access_levels,
+ :protected_branch_id)
+
+ remove_foreign_key_without_error(:protected_branch_push_access_levels,
+ :protected_branch_id)
+
+ remove_orphaned_rows
+ add_foreign_keys
+
+ # These columns are not indexed yet, meaning a cascading delete would take
+ # forever.
+ add_concurrent_index(:project_group_links, :project_id)
+ add_concurrent_index(:pages_domains, :project_id)
+ end
+
+ def down
+ TABLES.each do |(source, _, column)|
+ remove_foreign_key_without_error(source, column)
+ end
+
+ add_concurrent_foreign_key(:boards, :projects, column: :project_id)
+ add_concurrent_foreign_key(:lists, :labels, column: :label_id)
+ add_concurrent_foreign_key(:lists, :boards, column: :board_id)
+
+ add_concurrent_foreign_key(:protected_branch_merge_access_levels,
+ :protected_branches,
+ column: :protected_branch_id)
+
+ add_concurrent_foreign_key(:protected_branch_push_access_levels,
+ :protected_branches,
+ column: :protected_branch_id)
+
+ remove_index_without_error(:project_group_links, :project_id)
+ remove_index_without_error(:pages_domains, :project_id)
+ end
+
+ def add_foreign_keys
+ TABLES.each do |(source, target, column)|
+ add_concurrent_foreign_key(source, target, column: column)
+ end
+ end
+
+ # Removes orphans from various tables concurrently.
+ def remove_orphaned_rows
+ Gitlab::Database.with_connection_pool(CONCURRENCY) do |pool|
+ queues = queues_for_rows(TABLES)
+
+ threads = queues.map do |queue|
+ Thread.new do
+ pool.with_connection do |connection|
+ Thread.current[:foreign_key_connection] = connection
+
+ # Disables statement timeouts for the current connection. This is
+ # necessary as removing of orphaned data might otherwise exceed the
+ # statement timeout.
+ disable_statement_timeout
+
+ remove_orphans(*queue.pop) until queue.empty?
+
+ steal_from_queues(queues - [queue])
+ end
+ end
+ end
+
+ threads.each(&:join)
+ end
+ end
+
+ def steal_from_queues(queues)
+ loop do
+ stolen = false
+
+ queues.each do |queue|
+ # Stealing is racy so it's possible a pop might be called on an
+ # already-empty queue.
+ begin
+ remove_orphans(*queue.pop(true))
+ stolen = true
+ rescue ThreadError
+ end
+ end
+
+ break unless stolen
+ end
+ end
+
+ def remove_orphans(source, target, column)
+ quoted_source = quote_table_name(source)
+ quoted_target = quote_table_name(target)
+ quoted_column = quote_column_name(column)
+
+ execute <<-EOF.strip_heredoc
+ DELETE FROM #{quoted_source}
+ WHERE NOT EXISTS (
+ SELECT true
+ FROM #{quoted_target}
+ WHERE #{quoted_target}.id = #{quoted_source}.#{quoted_column}
+ )
+ AND #{quoted_source}.#{quoted_column} IS NOT NULL
+ EOF
+ end
+
+ def remove_foreign_key_without_error(table, column)
+ remove_foreign_key(table, column: column)
+ rescue ArgumentError
+ end
+
+ def remove_index_without_error(table, column)
+ remove_concurrent_index(table, column)
+ rescue ArgumentError
+ end
+
+ def connection
+ # Rails memoizes connection objects, but this causes them to be shared
+ # amongst threads; we don't want that.
+ Thread.current[:foreign_key_connection] || ActiveRecord::Base.connection
+ end
+
+ def queues_for_rows(rows)
+ queues = Array.new(CONCURRENCY) { Queue.new }
+ slice_size = rows.length / CONCURRENCY
+
+ # Divide all the tuples as evenly as possible amongst the queues.
+ rows.each_slice(slice_size).each_with_index do |slice, index|
+ bucket = index % CONCURRENCY
+
+ slice.each do |row|
+ queues[bucket] << row
+ end
+ end
+
+ queues
+ end
+end
diff --git a/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb
new file mode 100644
index 00000000000..46497775527
--- /dev/null
+++ b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb
@@ -0,0 +1,40 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class CorrectProtectedBranchesForeignKeys < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ # Set this constant to true if this migration requires downtime.
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ def up
+ remove_foreign_key_without_error(:protected_branch_push_access_levels,
+ column: :protected_branch_id)
+
+ execute <<-EOF
+ DELETE FROM protected_branch_push_access_levels
+ WHERE NOT EXISTS (
+ SELECT true
+ FROM protected_branches
+ WHERE protected_branch_push_access_levels.protected_branch_id = protected_branches.id
+ )
+ AND protected_branch_id IS NOT NULL
+ EOF
+
+ add_concurrent_foreign_key(:protected_branch_push_access_levels,
+ :protected_branches,
+ column: :protected_branch_id)
+ end
+
+ def down
+ # Previously there was a foreign key without a CASCADING DELETE, so we'll
+ # just leave the foreign key in place.
+ end
+
+ def remove_foreign_key_without_error(*args)
+ remove_foreign_key(*args)
+ rescue ArgumentError
+ end
+end
diff --git a/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb
new file mode 100644
index 00000000000..9f524fac8a7
--- /dev/null
+++ b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb
@@ -0,0 +1,30 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class AddForeignKeyForMergeRequestDiffs < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ # Set this constant to true if this migration requires downtime.
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ def up
+ execute <<-EOF
+ DELETE FROM merge_request_diffs
+ WHERE NOT EXISTS (
+ SELECT true
+ FROM merge_requests
+ WHERE merge_requests.id = merge_request_diffs.merge_request_id
+ )
+ EOF
+
+ add_concurrent_foreign_key(:merge_request_diffs,
+ :merge_requests,
+ column: :merge_request_id)
+ end
+
+ def down
+ remove_foreign_key(:merge_request_diffs, column: :merge_request_id)
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 40f30a10a01..f12fdf903c5 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -971,6 +971,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do
end
add_index "pages_domains", ["domain"], name: "index_pages_domains_on_domain", unique: true, using: :btree
+ add_index "pages_domains", ["project_id"], name: "index_pages_domains_on_project_id", using: :btree
create_table "personal_access_tokens", force: :cascade do |t|
t.integer "user_id", null: false
@@ -1020,6 +1021,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do
end
add_index "project_group_links", ["group_id"], name: "index_project_group_links_on_group_id", using: :btree
+ add_index "project_group_links", ["project_id"], name: "index_project_group_links_on_project_id", using: :btree
create_table "project_import_data", force: :cascade do |t|
t.integer "project_id"
@@ -1528,48 +1530,75 @@ ActiveRecord::Schema.define(version: 20170703102400) do
add_index "web_hooks", ["project_id"], name: "index_web_hooks_on_project_id", using: :btree
add_index "web_hooks", ["type"], name: "index_web_hooks_on_type", using: :btree
- add_foreign_key "boards", "projects"
+ add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade
add_foreign_key "chat_teams", "namespaces", on_delete: :cascade
add_foreign_key "ci_builds", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_a2141b1522", on_delete: :nullify
add_foreign_key "ci_builds", "ci_stages", column: "stage_id", name: "fk_3a9eaa254d", on_delete: :cascade
+ add_foreign_key "ci_builds", "projects", name: "fk_befce0568a", on_delete: :cascade
add_foreign_key "ci_pipeline_schedules", "projects", name: "fk_8ead60fcc4", on_delete: :cascade
add_foreign_key "ci_pipeline_schedules", "users", column: "owner_id", name: "fk_9ea99f58d2", on_delete: :nullify
add_foreign_key "ci_pipelines", "ci_pipeline_schedules", column: "pipeline_schedule_id", name: "fk_3d34ab2e06", on_delete: :nullify
add_foreign_key "ci_pipelines", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_262d4c2d19", on_delete: :nullify
+ add_foreign_key "ci_pipelines", "projects", name: "fk_86635dbd80", on_delete: :cascade
+ add_foreign_key "ci_runner_projects", "projects", name: "fk_4478a6f1e4", on_delete: :cascade
add_foreign_key "ci_stages", "ci_pipelines", column: "pipeline_id", name: "fk_fb57e6cc56", on_delete: :cascade
add_foreign_key "ci_stages", "projects", name: "fk_2360681d1d", on_delete: :cascade
add_foreign_key "ci_trigger_requests", "ci_triggers", column: "trigger_id", name: "fk_b8ec8b7245", on_delete: :cascade
+ add_foreign_key "ci_triggers", "projects", name: "fk_e3e63f966e", on_delete: :cascade
add_foreign_key "ci_triggers", "users", column: "owner_id", name: "fk_e8e10d1964", on_delete: :cascade
add_foreign_key "ci_variables", "projects", name: "fk_ada5eb64b3", on_delete: :cascade
add_foreign_key "container_repositories", "projects"
+ add_foreign_key "deploy_keys_projects", "projects", name: "fk_58a901ca7e", on_delete: :cascade
+ add_foreign_key "deployments", "projects", name: "fk_b9a3851b82", on_delete: :cascade
+ add_foreign_key "environments", "projects", name: "fk_d1c8c1da6a", on_delete: :cascade
+ add_foreign_key "events", "projects", name: "fk_0434b48643", on_delete: :cascade
+ add_foreign_key "forked_project_links", "projects", column: "forked_to_project_id", name: "fk_434510edb0", on_delete: :cascade
add_foreign_key "issue_assignees", "issues", name: "fk_b7d881734a", on_delete: :cascade
add_foreign_key "issue_assignees", "users", name: "fk_5e0c8d9154", on_delete: :cascade
add_foreign_key "issue_metrics", "issues", on_delete: :cascade
+ add_foreign_key "issues", "projects", name: "fk_899c8f3231", on_delete: :cascade
add_foreign_key "label_priorities", "labels", on_delete: :cascade
add_foreign_key "label_priorities", "projects", on_delete: :cascade
add_foreign_key "labels", "namespaces", column: "group_id", on_delete: :cascade
- add_foreign_key "lists", "boards"
- add_foreign_key "lists", "labels"
+ add_foreign_key "labels", "projects", name: "fk_7de4989a69", on_delete: :cascade
+ add_foreign_key "lists", "boards", name: "fk_0d3f677137", on_delete: :cascade
+ add_foreign_key "lists", "labels", name: "fk_7a5553d60f", on_delete: :cascade
add_foreign_key "merge_request_diff_files", "merge_request_diffs", on_delete: :cascade
+ add_foreign_key "merge_request_diffs", "merge_requests", name: "fk_8483f3258f", on_delete: :cascade
add_foreign_key "merge_request_metrics", "ci_pipelines", column: "pipeline_id", on_delete: :cascade
add_foreign_key "merge_request_metrics", "merge_requests", on_delete: :cascade
+ add_foreign_key "merge_requests", "projects", column: "target_project_id", name: "fk_a6963e8447", on_delete: :cascade
add_foreign_key "merge_requests_closing_issues", "issues", on_delete: :cascade
add_foreign_key "merge_requests_closing_issues", "merge_requests", on_delete: :cascade
+ add_foreign_key "milestones", "projects", name: "fk_9bd0a0c791", on_delete: :cascade
+ add_foreign_key "notes", "projects", name: "fk_99e097b079", on_delete: :cascade
add_foreign_key "oauth_openid_requests", "oauth_access_grants", column: "access_grant_id", name: "fk_oauth_openid_requests_oauth_access_grants_access_grant_id"
+ add_foreign_key "pages_domains", "projects", name: "fk_ea2f6dfc6f", on_delete: :cascade
add_foreign_key "personal_access_tokens", "users"
add_foreign_key "project_authorizations", "projects", on_delete: :cascade
add_foreign_key "project_authorizations", "users", on_delete: :cascade
+ add_foreign_key "project_features", "projects", name: "fk_18513d9b92", on_delete: :cascade
+ add_foreign_key "project_group_links", "projects", name: "fk_daa8cee94c", on_delete: :cascade
+ add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade
add_foreign_key "project_statistics", "projects", on_delete: :cascade
- add_foreign_key "protected_branch_merge_access_levels", "protected_branches"
- add_foreign_key "protected_branch_push_access_levels", "protected_branches"
+ add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade
+ add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade
+ add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade
add_foreign_key "protected_tag_create_access_levels", "namespaces", column: "group_id"
add_foreign_key "protected_tag_create_access_levels", "protected_tags"
add_foreign_key "protected_tag_create_access_levels", "users"
+ add_foreign_key "protected_tags", "projects", name: "fk_8e4af87648", on_delete: :cascade
+ add_foreign_key "releases", "projects", name: "fk_47fe2a0596", on_delete: :cascade
+ add_foreign_key "services", "projects", name: "fk_71cce407f9", on_delete: :cascade
+ add_foreign_key "snippets", "projects", name: "fk_be41fd4bb7", on_delete: :cascade
add_foreign_key "subscriptions", "projects", on_delete: :cascade
add_foreign_key "system_note_metadata", "notes", name: "fk_d83a918cb1", on_delete: :cascade
add_foreign_key "timelogs", "issues", name: "fk_timelogs_issues_issue_id", on_delete: :cascade
add_foreign_key "timelogs", "merge_requests", name: "fk_timelogs_merge_requests_merge_request_id", on_delete: :cascade
+ add_foreign_key "todos", "projects", name: "fk_45054f9c45", on_delete: :cascade
add_foreign_key "trending_projects", "projects", on_delete: :cascade
add_foreign_key "u2f_registrations", "users"
+ add_foreign_key "users_star_projects", "projects", name: "fk_22cd27ddfc", on_delete: :cascade
add_foreign_key "web_hook_logs", "web_hooks", on_delete: :cascade
+ add_foreign_key "web_hooks", "projects", name: "fk_0c8ca6d9d1", on_delete: :cascade
end