From c63e3221587daf9e7464f7d2079ca8ed3111f6ff Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 30 May 2017 13:54:59 +0200 Subject: Add many foreign keys to the projects table This removes the need for relying on Rails' "dependent" option for data removal, which is _incredibly_ slow (even when using :delete_all) when deleting large amounts of data. This also ensures data consistency is enforced on DB level and not on application level (something Rails is really bad at). This commit also includes various migrations to add foreign keys to tables that eventually point to "projects" to ensure no rows get orphaned upon removing a project. --- ..._project_foreign_keys_with_cascading_deletes.rb | 187 +++++++++++++++++++++ ...0029_correct_protected_branches_foreign_keys.rb | 40 +++++ ...2212_add_foreign_key_for_merge_request_diffs.rb | 30 ++++ db/schema.rb | 39 ++++- 4 files changed, 291 insertions(+), 5 deletions(-) create mode 100644 db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb create mode 100644 db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb create mode 100644 db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb (limited to 'db') diff --git a/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb new file mode 100644 index 00000000000..3eaafac321d --- /dev/null +++ b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb @@ -0,0 +1,187 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class ProjectForeignKeysWithCascadingDeletes < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + CONCURRENCY = 4 + + disable_ddl_transaction! + + # The tables/columns for which to remove orphans and add foreign keys. Order + # matters as some tables/columns should be processed before others. + TABLES = [ + [:boards, :projects, :project_id], + [:lists, :labels, :label_id], + [:lists, :boards, :board_id], + [:services, :projects, :project_id], + [:forked_project_links, :projects, :forked_to_project_id], + [:merge_requests, :projects, :target_project_id], + [:labels, :projects, :project_id], + [:issues, :projects, :project_id], + [:events, :projects, :project_id], + [:milestones, :projects, :project_id], + [:notes, :projects, :project_id], + [:snippets, :projects, :project_id], + [:web_hooks, :projects, :project_id], + [:protected_branch_merge_access_levels, :protected_branches, :protected_branch_id], + [:protected_branch_push_access_levels, :protected_branches, :protected_branch_id], + [:protected_branches, :projects, :project_id], + [:protected_tags, :projects, :project_id], + [:deploy_keys_projects, :projects, :project_id], + [:users_star_projects, :projects, :project_id], + [:releases, :projects, :project_id], + [:project_group_links, :projects, :project_id], + [:pages_domains, :projects, :project_id], + [:todos, :projects, :project_id], + [:project_import_data, :projects, :project_id], + [:project_features, :projects, :project_id], + [:ci_builds, :projects, :project_id], + [:ci_pipelines, :projects, :project_id], + [:ci_runner_projects, :projects, :project_id], + [:ci_triggers, :projects, :project_id], + [:environments, :projects, :project_id], + [:deployments, :projects, :project_id] + ] + + def up + # These existing foreign keys don't have an "ON DELETE CASCADE" clause. + remove_foreign_key_without_error(:boards, :project_id) + remove_foreign_key_without_error(:lists, :label_id) + remove_foreign_key_without_error(:lists, :board_id) + remove_foreign_key_without_error(:protected_branch_merge_access_levels, + :protected_branch_id) + + remove_foreign_key_without_error(:protected_branch_push_access_levels, + :protected_branch_id) + + remove_orphaned_rows + add_foreign_keys + + # These columns are not indexed yet, meaning a cascading delete would take + # forever. + add_concurrent_index(:project_group_links, :project_id) + add_concurrent_index(:pages_domains, :project_id) + end + + def down + TABLES.each do |(source, _, column)| + remove_foreign_key_without_error(source, column) + end + + add_concurrent_foreign_key(:boards, :projects, column: :project_id) + add_concurrent_foreign_key(:lists, :labels, column: :label_id) + add_concurrent_foreign_key(:lists, :boards, column: :board_id) + + add_concurrent_foreign_key(:protected_branch_merge_access_levels, + :protected_branches, + column: :protected_branch_id) + + add_concurrent_foreign_key(:protected_branch_push_access_levels, + :protected_branches, + column: :protected_branch_id) + + remove_index_without_error(:project_group_links, :project_id) + remove_index_without_error(:pages_domains, :project_id) + end + + def add_foreign_keys + TABLES.each do |(source, target, column)| + add_concurrent_foreign_key(source, target, column: column) + end + end + + # Removes orphans from various tables concurrently. + def remove_orphaned_rows + Gitlab::Database.with_connection_pool(CONCURRENCY) do |pool| + queues = queues_for_rows(TABLES) + + threads = queues.map do |queue| + Thread.new do + pool.with_connection do |connection| + Thread.current[:foreign_key_connection] = connection + + # Disables statement timeouts for the current connection. This is + # necessary as removing of orphaned data might otherwise exceed the + # statement timeout. + disable_statement_timeout + + remove_orphans(*queue.pop) until queue.empty? + + steal_from_queues(queues - [queue]) + end + end + end + + threads.each(&:join) + end + end + + def steal_from_queues(queues) + loop do + stolen = false + + queues.each do |queue| + # Stealing is racy so it's possible a pop might be called on an + # already-empty queue. + begin + remove_orphans(*queue.pop(true)) + stolen = true + rescue ThreadError + end + end + + break unless stolen + end + end + + def remove_orphans(source, target, column) + quoted_source = quote_table_name(source) + quoted_target = quote_table_name(target) + quoted_column = quote_column_name(column) + + execute <<-EOF.strip_heredoc + DELETE FROM #{quoted_source} + WHERE NOT EXISTS ( + SELECT true + FROM #{quoted_target} + WHERE #{quoted_target}.id = #{quoted_source}.#{quoted_column} + ) + AND #{quoted_source}.#{quoted_column} IS NOT NULL + EOF + end + + def remove_foreign_key_without_error(table, column) + remove_foreign_key(table, column: column) + rescue ArgumentError + end + + def remove_index_without_error(table, column) + remove_concurrent_index(table, column) + rescue ArgumentError + end + + def connection + # Rails memoizes connection objects, but this causes them to be shared + # amongst threads; we don't want that. + Thread.current[:foreign_key_connection] || ActiveRecord::Base.connection + end + + def queues_for_rows(rows) + queues = Array.new(CONCURRENCY) { Queue.new } + slice_size = rows.length / CONCURRENCY + + # Divide all the tuples as evenly as possible amongst the queues. + rows.each_slice(slice_size).each_with_index do |slice, index| + bucket = index % CONCURRENCY + + slice.each do |row| + queues[bucket] << row + end + end + + queues + end +end diff --git a/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb new file mode 100644 index 00000000000..46497775527 --- /dev/null +++ b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb @@ -0,0 +1,40 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class CorrectProtectedBranchesForeignKeys < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + remove_foreign_key_without_error(:protected_branch_push_access_levels, + column: :protected_branch_id) + + execute <<-EOF + DELETE FROM protected_branch_push_access_levels + WHERE NOT EXISTS ( + SELECT true + FROM protected_branches + WHERE protected_branch_push_access_levels.protected_branch_id = protected_branches.id + ) + AND protected_branch_id IS NOT NULL + EOF + + add_concurrent_foreign_key(:protected_branch_push_access_levels, + :protected_branches, + column: :protected_branch_id) + end + + def down + # Previously there was a foreign key without a CASCADING DELETE, so we'll + # just leave the foreign key in place. + end + + def remove_foreign_key_without_error(*args) + remove_foreign_key(*args) + rescue ArgumentError + end +end diff --git a/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb new file mode 100644 index 00000000000..9f524fac8a7 --- /dev/null +++ b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb @@ -0,0 +1,30 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddForeignKeyForMergeRequestDiffs < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + execute <<-EOF + DELETE FROM merge_request_diffs + WHERE NOT EXISTS ( + SELECT true + FROM merge_requests + WHERE merge_requests.id = merge_request_diffs.merge_request_id + ) + EOF + + add_concurrent_foreign_key(:merge_request_diffs, + :merge_requests, + column: :merge_request_id) + end + + def down + remove_foreign_key(:merge_request_diffs, column: :merge_request_id) + end +end diff --git a/db/schema.rb b/db/schema.rb index 40f30a10a01..f12fdf903c5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -971,6 +971,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do end add_index "pages_domains", ["domain"], name: "index_pages_domains_on_domain", unique: true, using: :btree + add_index "pages_domains", ["project_id"], name: "index_pages_domains_on_project_id", using: :btree create_table "personal_access_tokens", force: :cascade do |t| t.integer "user_id", null: false @@ -1020,6 +1021,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do end add_index "project_group_links", ["group_id"], name: "index_project_group_links_on_group_id", using: :btree + add_index "project_group_links", ["project_id"], name: "index_project_group_links_on_project_id", using: :btree create_table "project_import_data", force: :cascade do |t| t.integer "project_id" @@ -1528,48 +1530,75 @@ ActiveRecord::Schema.define(version: 20170703102400) do add_index "web_hooks", ["project_id"], name: "index_web_hooks_on_project_id", using: :btree add_index "web_hooks", ["type"], name: "index_web_hooks_on_type", using: :btree - add_foreign_key "boards", "projects" + add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade add_foreign_key "chat_teams", "namespaces", on_delete: :cascade add_foreign_key "ci_builds", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_a2141b1522", on_delete: :nullify add_foreign_key "ci_builds", "ci_stages", column: "stage_id", name: "fk_3a9eaa254d", on_delete: :cascade + add_foreign_key "ci_builds", "projects", name: "fk_befce0568a", on_delete: :cascade add_foreign_key "ci_pipeline_schedules", "projects", name: "fk_8ead60fcc4", on_delete: :cascade add_foreign_key "ci_pipeline_schedules", "users", column: "owner_id", name: "fk_9ea99f58d2", on_delete: :nullify add_foreign_key "ci_pipelines", "ci_pipeline_schedules", column: "pipeline_schedule_id", name: "fk_3d34ab2e06", on_delete: :nullify add_foreign_key "ci_pipelines", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_262d4c2d19", on_delete: :nullify + add_foreign_key "ci_pipelines", "projects", name: "fk_86635dbd80", on_delete: :cascade + add_foreign_key "ci_runner_projects", "projects", name: "fk_4478a6f1e4", on_delete: :cascade add_foreign_key "ci_stages", "ci_pipelines", column: "pipeline_id", name: "fk_fb57e6cc56", on_delete: :cascade add_foreign_key "ci_stages", "projects", name: "fk_2360681d1d", on_delete: :cascade add_foreign_key "ci_trigger_requests", "ci_triggers", column: "trigger_id", name: "fk_b8ec8b7245", on_delete: :cascade + add_foreign_key "ci_triggers", "projects", name: "fk_e3e63f966e", on_delete: :cascade add_foreign_key "ci_triggers", "users", column: "owner_id", name: "fk_e8e10d1964", on_delete: :cascade add_foreign_key "ci_variables", "projects", name: "fk_ada5eb64b3", on_delete: :cascade add_foreign_key "container_repositories", "projects" + add_foreign_key "deploy_keys_projects", "projects", name: "fk_58a901ca7e", on_delete: :cascade + add_foreign_key "deployments", "projects", name: "fk_b9a3851b82", on_delete: :cascade + add_foreign_key "environments", "projects", name: "fk_d1c8c1da6a", on_delete: :cascade + add_foreign_key "events", "projects", name: "fk_0434b48643", on_delete: :cascade + add_foreign_key "forked_project_links", "projects", column: "forked_to_project_id", name: "fk_434510edb0", on_delete: :cascade add_foreign_key "issue_assignees", "issues", name: "fk_b7d881734a", on_delete: :cascade add_foreign_key "issue_assignees", "users", name: "fk_5e0c8d9154", on_delete: :cascade add_foreign_key "issue_metrics", "issues", on_delete: :cascade + add_foreign_key "issues", "projects", name: "fk_899c8f3231", on_delete: :cascade add_foreign_key "label_priorities", "labels", on_delete: :cascade add_foreign_key "label_priorities", "projects", on_delete: :cascade add_foreign_key "labels", "namespaces", column: "group_id", on_delete: :cascade - add_foreign_key "lists", "boards" - add_foreign_key "lists", "labels" + add_foreign_key "labels", "projects", name: "fk_7de4989a69", on_delete: :cascade + add_foreign_key "lists", "boards", name: "fk_0d3f677137", on_delete: :cascade + add_foreign_key "lists", "labels", name: "fk_7a5553d60f", on_delete: :cascade add_foreign_key "merge_request_diff_files", "merge_request_diffs", on_delete: :cascade + add_foreign_key "merge_request_diffs", "merge_requests", name: "fk_8483f3258f", on_delete: :cascade add_foreign_key "merge_request_metrics", "ci_pipelines", column: "pipeline_id", on_delete: :cascade add_foreign_key "merge_request_metrics", "merge_requests", on_delete: :cascade + add_foreign_key "merge_requests", "projects", column: "target_project_id", name: "fk_a6963e8447", on_delete: :cascade add_foreign_key "merge_requests_closing_issues", "issues", on_delete: :cascade add_foreign_key "merge_requests_closing_issues", "merge_requests", on_delete: :cascade + add_foreign_key "milestones", "projects", name: "fk_9bd0a0c791", on_delete: :cascade + add_foreign_key "notes", "projects", name: "fk_99e097b079", on_delete: :cascade add_foreign_key "oauth_openid_requests", "oauth_access_grants", column: "access_grant_id", name: "fk_oauth_openid_requests_oauth_access_grants_access_grant_id" + add_foreign_key "pages_domains", "projects", name: "fk_ea2f6dfc6f", on_delete: :cascade add_foreign_key "personal_access_tokens", "users" add_foreign_key "project_authorizations", "projects", on_delete: :cascade add_foreign_key "project_authorizations", "users", on_delete: :cascade + add_foreign_key "project_features", "projects", name: "fk_18513d9b92", on_delete: :cascade + add_foreign_key "project_group_links", "projects", name: "fk_daa8cee94c", on_delete: :cascade + add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade add_foreign_key "project_statistics", "projects", on_delete: :cascade - add_foreign_key "protected_branch_merge_access_levels", "protected_branches" - add_foreign_key "protected_branch_push_access_levels", "protected_branches" + add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade + add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade + add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade add_foreign_key "protected_tag_create_access_levels", "namespaces", column: "group_id" add_foreign_key "protected_tag_create_access_levels", "protected_tags" add_foreign_key "protected_tag_create_access_levels", "users" + add_foreign_key "protected_tags", "projects", name: "fk_8e4af87648", on_delete: :cascade + add_foreign_key "releases", "projects", name: "fk_47fe2a0596", on_delete: :cascade + add_foreign_key "services", "projects", name: "fk_71cce407f9", on_delete: :cascade + add_foreign_key "snippets", "projects", name: "fk_be41fd4bb7", on_delete: :cascade add_foreign_key "subscriptions", "projects", on_delete: :cascade add_foreign_key "system_note_metadata", "notes", name: "fk_d83a918cb1", on_delete: :cascade add_foreign_key "timelogs", "issues", name: "fk_timelogs_issues_issue_id", on_delete: :cascade add_foreign_key "timelogs", "merge_requests", name: "fk_timelogs_merge_requests_merge_request_id", on_delete: :cascade + add_foreign_key "todos", "projects", name: "fk_45054f9c45", on_delete: :cascade add_foreign_key "trending_projects", "projects", on_delete: :cascade add_foreign_key "u2f_registrations", "users" + add_foreign_key "users_star_projects", "projects", name: "fk_22cd27ddfc", on_delete: :cascade add_foreign_key "web_hook_logs", "web_hooks", on_delete: :cascade + add_foreign_key "web_hooks", "projects", name: "fk_0c8ca6d9d1", on_delete: :cascade end -- cgit v1.2.1 From aff5c9f3e5ecdd9eee2b2b60ab6367da878582fc Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Fri, 16 Jun 2017 15:00:58 +0100 Subject: Add table for merge request commits This is an ID-less table with just three columns: an association to the merge request diff the commit belongs to, the relative order of the commit within the merge request diff, and the commit SHA itself. Previously we stored much more information about the commits, so that we could display them even when they were deleted from the repo. Since 8.0, we ensure that those commits are kept around for as long as the target repo itself is, so we don't need to duplicate that data in the database. --- ...170616133147_create_merge_request_diff_commits.rb | 20 ++++++++++++++++++++ db/schema.rb | 16 ++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 db/migrate/20170616133147_create_merge_request_diff_commits.rb (limited to 'db') diff --git a/db/migrate/20170616133147_create_merge_request_diff_commits.rb b/db/migrate/20170616133147_create_merge_request_diff_commits.rb new file mode 100644 index 00000000000..616464cb470 --- /dev/null +++ b/db/migrate/20170616133147_create_merge_request_diff_commits.rb @@ -0,0 +1,20 @@ +class CreateMergeRequestDiffCommits < ActiveRecord::Migration + DOWNTIME = false + + def change + create_table :merge_request_diff_commits, id: false do |t| + t.datetime_with_timezone :authored_date + t.datetime_with_timezone :committed_date + t.belongs_to :merge_request_diff, null: false, foreign_key: { on_delete: :cascade } + t.integer :relative_order, null: false + t.binary :sha, null: false, limit: 20 + t.text :author_name + t.text :author_email + t.text :committer_name + t.text :committer_email + t.text :message + + t.index [:merge_request_diff_id, :relative_order], name: 'index_merge_request_diff_commits_on_mr_diff_id_and_order', unique: true + end + end +end diff --git a/db/schema.rb b/db/schema.rb index f12fdf903c5..a47a6ae9a98 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -693,6 +693,21 @@ ActiveRecord::Schema.define(version: 20170703102400) do add_index "members", ["source_id", "source_type"], name: "index_members_on_source_id_and_source_type", using: :btree add_index "members", ["user_id"], name: "index_members_on_user_id", using: :btree + create_table "merge_request_diff_commits", id: false, force: :cascade do |t| + t.datetime "authored_date" + t.datetime "committed_date" + t.integer "merge_request_diff_id", null: false + t.integer "relative_order", null: false + t.binary "sha", null: false + t.text "author_name" + t.text "author_email" + t.text "committer_name" + t.text "committer_email" + t.text "message" + end + + add_index "merge_request_diff_commits", ["merge_request_diff_id", "relative_order"], name: "index_merge_request_diff_commits_on_mr_diff_id_and_order", unique: true, using: :btree + create_table "merge_request_diff_files", id: false, force: :cascade do |t| t.integer "merge_request_diff_id", null: false t.integer "relative_order", null: false @@ -1563,6 +1578,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do add_foreign_key "labels", "projects", name: "fk_7de4989a69", on_delete: :cascade add_foreign_key "lists", "boards", name: "fk_0d3f677137", on_delete: :cascade add_foreign_key "lists", "labels", name: "fk_7a5553d60f", on_delete: :cascade + add_foreign_key "merge_request_diff_commits", "merge_request_diffs", on_delete: :cascade add_foreign_key "merge_request_diff_files", "merge_request_diffs", on_delete: :cascade add_foreign_key "merge_request_diffs", "merge_requests", name: "fk_8483f3258f", on_delete: :cascade add_foreign_key "merge_request_metrics", "ci_pipelines", column: "pipeline_id", on_delete: :cascade -- cgit v1.2.1