diff options
author | Mayra Cabrera <mcabrera@gitlab.com> | 2019-06-17 13:45:44 -0500 |
---|---|---|
committer | Mayra Cabrera <mcabrera@gitlab.com> | 2019-06-19 12:05:52 -0500 |
commit | a338b50f76bb7f492fbe3e68760566e64fd607be (patch) | |
tree | c8a0b1579bfe42fd3d4683306051ca1fc459ed03 | |
parent | 6d4f33ceafbf55ae1283352d092c873221fdcbf1 (diff) | |
download | gitlab-ce-62214-namespace-database-migration.tar.gz |
Migrate root_id column on Namespaces table62214-namespace-database-migration
Includes:
- Migration to add root_id column on Namespaces table
- Background migration to schedule the population of this new field
-rw-r--r-- | db/migrate/20190617154954_add_root_id_to_namespace.rb | 11 | ||||
-rw-r--r-- | db/migrate/20190617181054_schedule_populate_namespace_root_id.rb | 32 | ||||
-rw-r--r-- | db/schema.rb | 3 | ||||
-rw-r--r-- | lib/gitlab/background_migration/populate_namespace_root_id_column.rb | 73 | ||||
-rw-r--r-- | spec/db/schema_spec.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb | 76 |
6 files changed, 195 insertions, 2 deletions
diff --git a/db/migrate/20190617154954_add_root_id_to_namespace.rb b/db/migrate/20190617154954_add_root_id_to_namespace.rb new file mode 100644 index 00000000000..db9bfa7fd8b --- /dev/null +++ b/db/migrate/20190617154954_add_root_id_to_namespace.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class AddRootIdToNamespace < ActiveRecord::Migration[5.1] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + add_column :namespaces, :root_id, :integer + end +end diff --git a/db/migrate/20190617181054_schedule_populate_namespace_root_id.rb b/db/migrate/20190617181054_schedule_populate_namespace_root_id.rb new file mode 100644 index 00000000000..09e28cbe01c --- /dev/null +++ b/db/migrate/20190617181054_schedule_populate_namespace_root_id.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +class SchedulePopulateNamespaceRootId < ActiveRecord::Migration[5.1] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 10_000 + MIGRATION = 'PopulateNamespaceRootIdColumn' + DELAY_INTERVAL = 10.minutes.to_i + + disable_ddl_transaction! + + class Namespace < ActiveRecord::Base + self.table_name = 'namespaces' + + include EachBatch + end + + def up + say 'Scheduling `PopulateNamespaceRootIdColumn` jobs' + + # We currently have ~4_600_000 namespace records on GitLab.com + # This means, the migration will schedule ~460 jobs (10k each) within a 10 minutes gap. + # so this should take ~153 hours to complete (assuming 30k namespaces per hour) + queue_background_migration_jobs_by_range_at_intervals( + Namespace, + MIGRATION, + DELAY_INTERVAL, + batch_size: BATCH_SIZE + ) + end +end diff --git a/db/schema.rb b/db/schema.rb index 7a7319c132e..b534877e30c 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20190613030606) do +ActiveRecord::Schema.define(version: 20190617181054) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -2098,6 +2098,7 @@ ActiveRecord::Schema.define(version: 20190613030606) do t.integer "extra_shared_runners_minutes_limit" t.string "ldap_sync_status", default: "ready", null: false t.boolean "membership_lock", default: false + t.integer "root_id" t.index ["created_at"], name: "index_namespaces_on_created_at", using: :btree t.index ["custom_project_templates_group_id", "type"], name: "index_namespaces_on_custom_project_templates_group_id_and_type", where: "(custom_project_templates_group_id IS NOT NULL)", using: :btree t.index ["file_template_project_id"], name: "index_namespaces_on_file_template_project_id", using: :btree diff --git a/lib/gitlab/background_migration/populate_namespace_root_id_column.rb b/lib/gitlab/background_migration/populate_namespace_root_id_column.rb new file mode 100644 index 00000000000..33b9d59cba2 --- /dev/null +++ b/lib/gitlab/background_migration/populate_namespace_root_id_column.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # This background migration updates records on namespaces table + # according to the given namespace IDs range. + # + # A single update is issued for the given range. + class PopulateNamespaceRootIdColumn + def perform(from_id, to_id) + root_namespaces = root_namespaces_between(from_id: from_id, to_id: to_id) + return if root_namespaces.empty? + + namespaces_information = associate_children_with_root_namespaces(root_namespaces) + sql_query = build_update_namespaces_sql(namespaces_information) + + execute(sql_query) + end + + private + + def root_namespaces_between(from_id:, to_id:) + Namespace + .where('parent_id IS NULL') + .where(id: from_id..to_id) + end + + def associate_children_with_root_namespaces(root_namespaces) + {}.tap do |namespaces_information| + root_namespaces.each do |root_namespace| + root_namespace.self_and_descendants.each do |namespace| + namespaces_information[namespace.id] = root_namespace.id + end + end + end + end + + def build_update_namespaces_sql(namespaces_information) + case_statements = build_case_statements(namespaces_information) + namespace_ids = namespaces_information.keys + + update_sql_query(case_statements: case_statements, namespace_ids: namespace_ids) + end + + def build_case_statements(namespaces) + [].tap do |namespaces_information| + namespaces.each do |child_namespace_id, root_namespace_id| + statement = "WHEN #{child_namespace_id} THEN #{root_namespace_id}" + namespaces_information << statement + end + end + end + + def update_sql_query(case_statements:, namespace_ids:) + <<~SQL + UPDATE namespaces + SET root_id = CASE id + #{case_statements.join("\n")} + END + WHERE id IN (#{namespace_ids.join(",")}) + SQL + end + + def execute(sql) + connection.execute(sql) + end + + def connection + @connection ||= ActiveRecord::Base.connection + end + end + end +end diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb index 6cfec5f4017..55b42dbe35f 100644 --- a/spec/db/schema_spec.rb +++ b/spec/db/schema_spec.rb @@ -46,7 +46,7 @@ describe 'Database schema' do ldap_group_links: %w[group_id], members: %w[source_id created_by_id], merge_requests: %w[last_edited_by_id state_id], - namespaces: %w[owner_id parent_id], + namespaces: %w[owner_id parent_id root_id], notes: %w[author_id commit_id noteable_id updated_by_id resolved_by_id discussion_id], notification_settings: %w[source_id], oauth_access_grants: %w[resource_owner_id application_id], diff --git a/spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb b/spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb new file mode 100644 index 00000000000..57e926b7c55 --- /dev/null +++ b/spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe Gitlab::BackgroundMigration::PopulateNamespaceRootIdColumn, :migration, schema: 20190617181054 do + let(:namespaces_table) { table(:namespaces) } + + def create_namespace_for(parent:, iid:) + namespaces_table.create!( + name: "#{parent.name}-group_#{iid}", + path: "#{parent.name}-group_#{iid}", + parent_id: parent.id + ) + end + + describe '#perform' do + let(:root_namespace_a) { namespaces_table.create!(name: 'root_a', path: 'root-a') } + let(:root_namespace_b) { namespaces_table.create!(name: 'root_b', path: 'root-b') } + + before do + (1..10).each do |subgroup_id| + create_namespace_for(parent: root_namespace_a, iid: subgroup_id) + create_namespace_for(parent: root_namespace_b, iid: subgroup_id) + end + end + + it 'updates the root id of root namespaces' do + subject.perform(root_namespace_a.id, root_namespace_b.id) + + expect(root_namespace_a.reload.root_id).to eq(root_namespace_a.id) + expect(root_namespace_b.reload.root_id).to eq(root_namespace_b.id) + end + + it 'updates the root id of all namespaces' do + subject.perform(root_namespace_a.id, root_namespace_b.id) + + namespace_a_children = namespaces_table.where(parent_id: root_namespace_a.id) + namespace_b_children = namespaces_table.where(parent_id: root_namespace_b.id) + + namespace_a_children.each do |group| + expect(group.root_id).to eq(root_namespace_a.id) + end + + namespace_b_children.each do |group| + expect(group.root_id).to eq(root_namespace_b.id) + end + end + + context 'when a subgroup has children' do + let(:subgroup) do + create_namespace_for( + parent: root_namespace_a, + iid: 50 + ) + end + + before do + (1..10).each do |subgroup_id| + create_namespace_for(parent: subgroup, iid: subgroup_id) + end + end + + it 'updates inner groups' do + subject.perform(root_namespace_a.id, root_namespace_b.id) + + subgroup_children = namespaces_table.where(parent_id: subgroup.id) + + expect(subgroup.reload.root_id).to eq(root_namespace_a.id) + + subgroup_children.each do |group| + expect(group.root_id).to eq(root_namespace_a.id) + end + end + end + end +end |