summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMayra Cabrera <mcabrera@gitlab.com>2019-06-17 13:45:44 -0500
committerMayra Cabrera <mcabrera@gitlab.com>2019-06-19 12:05:52 -0500
commita338b50f76bb7f492fbe3e68760566e64fd607be (patch)
treec8a0b1579bfe42fd3d4683306051ca1fc459ed03
parent6d4f33ceafbf55ae1283352d092c873221fdcbf1 (diff)
downloadgitlab-ce-62214-namespace-database-migration.tar.gz
Migrate root_id column on Namespaces table62214-namespace-database-migration
Includes: - Migration to add root_id column on Namespaces table - Background migration to schedule the population of this new field
-rw-r--r--db/migrate/20190617154954_add_root_id_to_namespace.rb11
-rw-r--r--db/migrate/20190617181054_schedule_populate_namespace_root_id.rb32
-rw-r--r--db/schema.rb3
-rw-r--r--lib/gitlab/background_migration/populate_namespace_root_id_column.rb73
-rw-r--r--spec/db/schema_spec.rb2
-rw-r--r--spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb76
6 files changed, 195 insertions, 2 deletions
diff --git a/db/migrate/20190617154954_add_root_id_to_namespace.rb b/db/migrate/20190617154954_add_root_id_to_namespace.rb
new file mode 100644
index 00000000000..db9bfa7fd8b
--- /dev/null
+++ b/db/migrate/20190617154954_add_root_id_to_namespace.rb
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+class AddRootIdToNamespace < ActiveRecord::Migration[5.1]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ def change
+ add_column :namespaces, :root_id, :integer
+ end
+end
diff --git a/db/migrate/20190617181054_schedule_populate_namespace_root_id.rb b/db/migrate/20190617181054_schedule_populate_namespace_root_id.rb
new file mode 100644
index 00000000000..09e28cbe01c
--- /dev/null
+++ b/db/migrate/20190617181054_schedule_populate_namespace_root_id.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+class SchedulePopulateNamespaceRootId < ActiveRecord::Migration[5.1]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 10_000
+ MIGRATION = 'PopulateNamespaceRootIdColumn'
+ DELAY_INTERVAL = 10.minutes.to_i
+
+ disable_ddl_transaction!
+
+ class Namespace < ActiveRecord::Base
+ self.table_name = 'namespaces'
+
+ include EachBatch
+ end
+
+ def up
+ say 'Scheduling `PopulateNamespaceRootIdColumn` jobs'
+
+ # We currently have ~4_600_000 namespace records on GitLab.com
+ # This means, the migration will schedule ~460 jobs (10k each) within a 10 minutes gap.
+ # so this should take ~153 hours to complete (assuming 30k namespaces per hour)
+ queue_background_migration_jobs_by_range_at_intervals(
+ Namespace,
+ MIGRATION,
+ DELAY_INTERVAL,
+ batch_size: BATCH_SIZE
+ )
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 7a7319c132e..b534877e30c 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20190613030606) do
+ActiveRecord::Schema.define(version: 20190617181054) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@@ -2098,6 +2098,7 @@ ActiveRecord::Schema.define(version: 20190613030606) do
t.integer "extra_shared_runners_minutes_limit"
t.string "ldap_sync_status", default: "ready", null: false
t.boolean "membership_lock", default: false
+ t.integer "root_id"
t.index ["created_at"], name: "index_namespaces_on_created_at", using: :btree
t.index ["custom_project_templates_group_id", "type"], name: "index_namespaces_on_custom_project_templates_group_id_and_type", where: "(custom_project_templates_group_id IS NOT NULL)", using: :btree
t.index ["file_template_project_id"], name: "index_namespaces_on_file_template_project_id", using: :btree
diff --git a/lib/gitlab/background_migration/populate_namespace_root_id_column.rb b/lib/gitlab/background_migration/populate_namespace_root_id_column.rb
new file mode 100644
index 00000000000..33b9d59cba2
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_namespace_root_id_column.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module BackgroundMigration
+ # This background migration updates records on namespaces table
+ # according to the given namespace IDs range.
+ #
+ # A single update is issued for the given range.
+ class PopulateNamespaceRootIdColumn
+ def perform(from_id, to_id)
+ root_namespaces = root_namespaces_between(from_id: from_id, to_id: to_id)
+ return if root_namespaces.empty?
+
+ namespaces_information = associate_children_with_root_namespaces(root_namespaces)
+ sql_query = build_update_namespaces_sql(namespaces_information)
+
+ execute(sql_query)
+ end
+
+ private
+
+ def root_namespaces_between(from_id:, to_id:)
+ Namespace
+ .where('parent_id IS NULL')
+ .where(id: from_id..to_id)
+ end
+
+ def associate_children_with_root_namespaces(root_namespaces)
+ {}.tap do |namespaces_information|
+ root_namespaces.each do |root_namespace|
+ root_namespace.self_and_descendants.each do |namespace|
+ namespaces_information[namespace.id] = root_namespace.id
+ end
+ end
+ end
+ end
+
+ def build_update_namespaces_sql(namespaces_information)
+ case_statements = build_case_statements(namespaces_information)
+ namespace_ids = namespaces_information.keys
+
+ update_sql_query(case_statements: case_statements, namespace_ids: namespace_ids)
+ end
+
+ def build_case_statements(namespaces)
+ [].tap do |namespaces_information|
+ namespaces.each do |child_namespace_id, root_namespace_id|
+ statement = "WHEN #{child_namespace_id} THEN #{root_namespace_id}"
+ namespaces_information << statement
+ end
+ end
+ end
+
+ def update_sql_query(case_statements:, namespace_ids:)
+ <<~SQL
+ UPDATE namespaces
+ SET root_id = CASE id
+ #{case_statements.join("\n")}
+ END
+ WHERE id IN (#{namespace_ids.join(",")})
+ SQL
+ end
+
+ def execute(sql)
+ connection.execute(sql)
+ end
+
+ def connection
+ @connection ||= ActiveRecord::Base.connection
+ end
+ end
+ end
+end
diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb
index 6cfec5f4017..55b42dbe35f 100644
--- a/spec/db/schema_spec.rb
+++ b/spec/db/schema_spec.rb
@@ -46,7 +46,7 @@ describe 'Database schema' do
ldap_group_links: %w[group_id],
members: %w[source_id created_by_id],
merge_requests: %w[last_edited_by_id state_id],
- namespaces: %w[owner_id parent_id],
+ namespaces: %w[owner_id parent_id root_id],
notes: %w[author_id commit_id noteable_id updated_by_id resolved_by_id discussion_id],
notification_settings: %w[source_id],
oauth_access_grants: %w[resource_owner_id application_id],
diff --git a/spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb b/spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb
new file mode 100644
index 00000000000..57e926b7c55
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/populate_namespace_root_id_column_spec.rb
@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+describe Gitlab::BackgroundMigration::PopulateNamespaceRootIdColumn, :migration, schema: 20190617181054 do
+ let(:namespaces_table) { table(:namespaces) }
+
+ def create_namespace_for(parent:, iid:)
+ namespaces_table.create!(
+ name: "#{parent.name}-group_#{iid}",
+ path: "#{parent.name}-group_#{iid}",
+ parent_id: parent.id
+ )
+ end
+
+ describe '#perform' do
+ let(:root_namespace_a) { namespaces_table.create!(name: 'root_a', path: 'root-a') }
+ let(:root_namespace_b) { namespaces_table.create!(name: 'root_b', path: 'root-b') }
+
+ before do
+ (1..10).each do |subgroup_id|
+ create_namespace_for(parent: root_namespace_a, iid: subgroup_id)
+ create_namespace_for(parent: root_namespace_b, iid: subgroup_id)
+ end
+ end
+
+ it 'updates the root id of root namespaces' do
+ subject.perform(root_namespace_a.id, root_namespace_b.id)
+
+ expect(root_namespace_a.reload.root_id).to eq(root_namespace_a.id)
+ expect(root_namespace_b.reload.root_id).to eq(root_namespace_b.id)
+ end
+
+ it 'updates the root id of all namespaces' do
+ subject.perform(root_namespace_a.id, root_namespace_b.id)
+
+ namespace_a_children = namespaces_table.where(parent_id: root_namespace_a.id)
+ namespace_b_children = namespaces_table.where(parent_id: root_namespace_b.id)
+
+ namespace_a_children.each do |group|
+ expect(group.root_id).to eq(root_namespace_a.id)
+ end
+
+ namespace_b_children.each do |group|
+ expect(group.root_id).to eq(root_namespace_b.id)
+ end
+ end
+
+ context 'when a subgroup has children' do
+ let(:subgroup) do
+ create_namespace_for(
+ parent: root_namespace_a,
+ iid: 50
+ )
+ end
+
+ before do
+ (1..10).each do |subgroup_id|
+ create_namespace_for(parent: subgroup, iid: subgroup_id)
+ end
+ end
+
+ it 'updates inner groups' do
+ subject.perform(root_namespace_a.id, root_namespace_b.id)
+
+ subgroup_children = namespaces_table.where(parent_id: subgroup.id)
+
+ expect(subgroup.reload.root_id).to eq(root_namespace_a.id)
+
+ subgroup_children.each do |group|
+ expect(group.root_id).to eq(root_namespace_a.id)
+ end
+ end
+ end
+ end
+end