From f579a08751b8f5f9be479812355cce623d29eb11 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Fri, 23 Feb 2018 17:43:37 +0100 Subject: Add post-migration to populate user_contributed_projects table. Trying to be a good citizen here, we spread the load building that table. Maybe not required, but at least good practice and no harm. --- ...124427_build_user_contributed_projects_table.rb | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 db/post_migrate/20180223124427_build_user_contributed_projects_table.rb (limited to 'db/post_migrate') diff --git a/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb b/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb new file mode 100644 index 00000000000..4f4e3617e12 --- /dev/null +++ b/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb @@ -0,0 +1,48 @@ +class BuildUserContributedProjectsTable < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + BATCH_SIZE = 100000 + SLEEP_TIME = 30 + + def up + with_index(:events, [:author_id, :project_id], name: 'events_user_contributions_temp', where: 'project_id IS NOT NULL') do + iteration = 0 + records = 0 + begin + Rails.logger.info "Building user_contributed_projects table, batch ##{iteration}" + result = execute <<~SQL + INSERT INTO user_contributed_projects (user_id, project_id) + SELECT e.user_id, e.project_id + FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e + LEFT JOIN user_contributed_projects ucp USING (user_id, project_id) + WHERE ucp.user_id IS NULL + LIMIT #{BATCH_SIZE} + SQL + iteration += 1 + records += result.cmd_tuples + Rails.logger.info "Building user_contributed_projects table, batch ##{iteration} complete, created #{records} overall" + Kernel.sleep(SLEEP_TIME) if result.cmd_tuples > 0 + end while result.cmd_tuples > 0 + end + + execute "ANALYZE user_contributed_projects" if Gitlab::Database.postgresql? + end + + def down + execute "TRUNCATE user_contributed_projects" + end + + private + + def with_index(*args) + add_concurrent_index(*args) unless index_exists?(*args) + yield + ensure + remove_concurrent_index(*args) if index_exists?(*args) + end +end -- cgit v1.2.1 From f88d5132c2c94407838e589e450f1b4167fd2629 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Fri, 23 Feb 2018 19:50:32 +0100 Subject: Simpler migration strategy for MySQL. MySQL does not return the number of inserted rows (or at least not expose them through cmd_tuples). Let's do one-shot INSERT instead. --- ...124427_build_user_contributed_projects_table.rb | 87 ++++++++++++++-------- 1 file changed, 58 insertions(+), 29 deletions(-) (limited to 'db/post_migrate') diff --git a/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb b/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb index 4f4e3617e12..40049d4ab65 100644 --- a/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb +++ b/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb @@ -6,31 +6,12 @@ class BuildUserContributedProjectsTable < ActiveRecord::Migration disable_ddl_transaction! - BATCH_SIZE = 100000 - SLEEP_TIME = 30 - def up - with_index(:events, [:author_id, :project_id], name: 'events_user_contributions_temp', where: 'project_id IS NOT NULL') do - iteration = 0 - records = 0 - begin - Rails.logger.info "Building user_contributed_projects table, batch ##{iteration}" - result = execute <<~SQL - INSERT INTO user_contributed_projects (user_id, project_id) - SELECT e.user_id, e.project_id - FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e - LEFT JOIN user_contributed_projects ucp USING (user_id, project_id) - WHERE ucp.user_id IS NULL - LIMIT #{BATCH_SIZE} - SQL - iteration += 1 - records += result.cmd_tuples - Rails.logger.info "Building user_contributed_projects table, batch ##{iteration} complete, created #{records} overall" - Kernel.sleep(SLEEP_TIME) if result.cmd_tuples > 0 - end while result.cmd_tuples > 0 - end - - execute "ANALYZE user_contributed_projects" if Gitlab::Database.postgresql? + if Gitlab::Database.postgresql? + PostgresStrategy.new + else + MysqlStrategy.new + end.up end def down @@ -39,10 +20,58 @@ class BuildUserContributedProjectsTable < ActiveRecord::Migration private - def with_index(*args) - add_concurrent_index(*args) unless index_exists?(*args) - yield - ensure - remove_concurrent_index(*args) if index_exists?(*args) + class PostgresStrategy < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + BATCH_SIZE = 100_000 + SLEEP_TIME = 30 + + def up + with_index(:events, [:author_id, :project_id], name: 'events_user_contributions_temp', where: 'project_id IS NOT NULL') do + iteration = 0 + records = 0 + begin + Rails.logger.info "Building user_contributed_projects table, batch ##{iteration}" + result = execute <<~SQL + INSERT INTO user_contributed_projects (user_id, project_id) + SELECT e.user_id, e.project_id + FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e + LEFT JOIN user_contributed_projects ucp USING (user_id, project_id) + WHERE ucp.user_id IS NULL + LIMIT #{BATCH_SIZE} + SQL + iteration += 1 + records += result.cmd_tuples + Rails.logger.info "Building user_contributed_projects table, batch ##{iteration} complete, created #{records} overall" + Kernel.sleep(SLEEP_TIME) if result.cmd_tuples > 0 + end while result.cmd_tuples > 0 + end + + execute "ANALYZE user_contributed_projects" + end + + private + + def with_index(*args) + add_concurrent_index(*args) unless index_exists?(*args) + yield + ensure + remove_concurrent_index(*args) if index_exists?(*args) + end end + + class MysqlStrategy < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + def up + execute <<~SQL + INSERT INTO user_contributed_projects (user_id, project_id) + SELECT e.user_id, e.project_id + FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e + LEFT JOIN user_contributed_projects ucp USING (user_id, project_id) + WHERE ucp.user_id IS NULL + SQL + end + end + end -- cgit v1.2.1 From 986f470d40a3ed993bb6b2c2115a05c24eb575fa Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Wed, 28 Feb 2018 16:19:43 +0100 Subject: Improve migration robustness and speed. --- .../20180223124427_build_user_contributed_projects_table.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'db/post_migrate') diff --git a/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb b/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb index 40049d4ab65..ede130a3fbf 100644 --- a/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb +++ b/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb @@ -12,10 +12,14 @@ class BuildUserContributedProjectsTable < ActiveRecord::Migration else MysqlStrategy.new end.up + + add_concurrent_index :user_contributed_projects, [:project_id, :user_id], unique: true end def down execute "TRUNCATE user_contributed_projects" + + remove_concurrent_index_by_name :user_contributed_projects, 'index_user_contributed_projects_on_project_id_and_user_id' end private @@ -24,7 +28,7 @@ class BuildUserContributedProjectsTable < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers BATCH_SIZE = 100_000 - SLEEP_TIME = 30 + SLEEP_TIME = 5 def up with_index(:events, [:author_id, :project_id], name: 'events_user_contributions_temp', where: 'project_id IS NOT NULL') do @@ -44,10 +48,14 @@ class BuildUserContributedProjectsTable < ActiveRecord::Migration records += result.cmd_tuples Rails.logger.info "Building user_contributed_projects table, batch ##{iteration} complete, created #{records} overall" Kernel.sleep(SLEEP_TIME) if result.cmd_tuples > 0 + rescue ActiveRecord::InvalidForeignKey => e + Rails.logger.info "Retry on InvalidForeignKey: #{e}" + retry end while result.cmd_tuples > 0 end execute "ANALYZE user_contributed_projects" + end private -- cgit v1.2.1 From 43b74afd8656df8228c19d5e5e6aee9f87abc244 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Thu, 1 Mar 2018 15:42:04 +0100 Subject: Rename to UserInteractedProjects. This is to avoid a mix-up with the existing concept of 'user contributions'. See `User#contributed_projects` or `Event#contributions`. --- ...124427_build_user_contributed_projects_table.rb | 85 ---------------------- ...3124427_build_user_interacted_projects_table.rb | 85 ++++++++++++++++++++++ 2 files changed, 85 insertions(+), 85 deletions(-) delete mode 100644 db/post_migrate/20180223124427_build_user_contributed_projects_table.rb create mode 100644 db/post_migrate/20180223124427_build_user_interacted_projects_table.rb (limited to 'db/post_migrate') diff --git a/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb b/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb deleted file mode 100644 index ede130a3fbf..00000000000 --- a/db/post_migrate/20180223124427_build_user_contributed_projects_table.rb +++ /dev/null @@ -1,85 +0,0 @@ -class BuildUserContributedProjectsTable < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - # Set this constant to true if this migration requires downtime. - DOWNTIME = false - - disable_ddl_transaction! - - def up - if Gitlab::Database.postgresql? - PostgresStrategy.new - else - MysqlStrategy.new - end.up - - add_concurrent_index :user_contributed_projects, [:project_id, :user_id], unique: true - end - - def down - execute "TRUNCATE user_contributed_projects" - - remove_concurrent_index_by_name :user_contributed_projects, 'index_user_contributed_projects_on_project_id_and_user_id' - end - - private - - class PostgresStrategy < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - BATCH_SIZE = 100_000 - SLEEP_TIME = 5 - - def up - with_index(:events, [:author_id, :project_id], name: 'events_user_contributions_temp', where: 'project_id IS NOT NULL') do - iteration = 0 - records = 0 - begin - Rails.logger.info "Building user_contributed_projects table, batch ##{iteration}" - result = execute <<~SQL - INSERT INTO user_contributed_projects (user_id, project_id) - SELECT e.user_id, e.project_id - FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e - LEFT JOIN user_contributed_projects ucp USING (user_id, project_id) - WHERE ucp.user_id IS NULL - LIMIT #{BATCH_SIZE} - SQL - iteration += 1 - records += result.cmd_tuples - Rails.logger.info "Building user_contributed_projects table, batch ##{iteration} complete, created #{records} overall" - Kernel.sleep(SLEEP_TIME) if result.cmd_tuples > 0 - rescue ActiveRecord::InvalidForeignKey => e - Rails.logger.info "Retry on InvalidForeignKey: #{e}" - retry - end while result.cmd_tuples > 0 - end - - execute "ANALYZE user_contributed_projects" - - end - - private - - def with_index(*args) - add_concurrent_index(*args) unless index_exists?(*args) - yield - ensure - remove_concurrent_index(*args) if index_exists?(*args) - end - end - - class MysqlStrategy < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - def up - execute <<~SQL - INSERT INTO user_contributed_projects (user_id, project_id) - SELECT e.user_id, e.project_id - FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e - LEFT JOIN user_contributed_projects ucp USING (user_id, project_id) - WHERE ucp.user_id IS NULL - SQL - end - end - -end diff --git a/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb b/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb new file mode 100644 index 00000000000..0992509c629 --- /dev/null +++ b/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb @@ -0,0 +1,85 @@ +class BuildUserInteractedProjectsTable < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + if Gitlab::Database.postgresql? + PostgresStrategy.new + else + MysqlStrategy.new + end.up + + add_concurrent_index :user_interacted_projects, [:project_id, :user_id], unique: true + end + + def down + execute "TRUNCATE user_interacted_projects" + + remove_concurrent_index_by_name :user_interacted_projects, 'index_user_interacted_projects_on_project_id_and_user_id' + end + + private + + class PostgresStrategy < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + BATCH_SIZE = 100_000 + SLEEP_TIME = 5 + + def up + with_index(:events, [:author_id, :project_id], name: 'events_user_interactions_temp', where: 'project_id IS NOT NULL') do + iteration = 0 + records = 0 + begin + Rails.logger.info "Building user_interacted_projects table, batch ##{iteration}" + result = execute <<~SQL + INSERT INTO user_interacted_projects (user_id, project_id) + SELECT e.user_id, e.project_id + FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e + LEFT JOIN user_interacted_projects ucp USING (user_id, project_id) + WHERE ucp.user_id IS NULL + LIMIT #{BATCH_SIZE} + SQL + iteration += 1 + records += result.cmd_tuples + Rails.logger.info "Building user_interacted_projects table, batch ##{iteration} complete, created #{records} overall" + Kernel.sleep(SLEEP_TIME) if result.cmd_tuples > 0 + rescue ActiveRecord::InvalidForeignKey => e + Rails.logger.info "Retry on InvalidForeignKey: #{e}" + retry + end while result.cmd_tuples > 0 + end + + execute "ANALYZE user_interacted_projects" + + end + + private + + def with_index(*args) + add_concurrent_index(*args) unless index_exists?(*args) + yield + ensure + remove_concurrent_index(*args) if index_exists?(*args) + end + end + + class MysqlStrategy < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + def up + execute <<~SQL + INSERT INTO user_interacted_projects (user_id, project_id) + SELECT e.user_id, e.project_id + FROM (SELECT DISTINCT author_id AS user_id, project_id FROM events WHERE project_id IS NOT NULL) AS e + LEFT JOIN user_interacted_projects ucp USING (user_id, project_id) + WHERE ucp.user_id IS NULL + SQL + end + end + +end -- cgit v1.2.1 From 8a7cd25d08789ee70eaa7de3367f7ad286c49272 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Thu, 1 Mar 2018 17:25:05 +0100 Subject: For MySQL, foreign keys are dependent on index. --- .../20180223124427_build_user_interacted_projects_table.rb | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'db/post_migrate') diff --git a/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb b/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb index 0992509c629..4d14dc7b4fb 100644 --- a/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb +++ b/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb @@ -14,11 +14,17 @@ class BuildUserInteractedProjectsTable < ActiveRecord::Migration end.up add_concurrent_index :user_interacted_projects, [:project_id, :user_id], unique: true + + add_concurrent_foreign_key :user_interacted_projects, :users, column: :user_id, on_delete: :cascade + add_concurrent_foreign_key :user_interacted_projects, :projects, column: :project_id, on_delete: :cascade end def down execute "TRUNCATE user_interacted_projects" + remove_foreign_key :user_interacted_projects, :users + remove_foreign_key :user_interacted_projects, :projects + remove_concurrent_index_by_name :user_interacted_projects, 'index_user_interacted_projects_on_project_id_and_user_id' end -- cgit v1.2.1 From 8e668076a975447a662bf8927388ce55f7d22dba Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Mon, 5 Mar 2018 17:47:27 +0100 Subject: Improve robustness of migration. --- ...3124427_build_user_interacted_projects_table.rb | 45 +++++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'db/post_migrate') diff --git a/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb b/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb index 4d14dc7b4fb..5e729b1aa53 100644 --- a/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb +++ b/db/post_migrate/20180223124427_build_user_interacted_projects_table.rb @@ -13,23 +13,56 @@ class BuildUserInteractedProjectsTable < ActiveRecord::Migration MysqlStrategy.new end.up - add_concurrent_index :user_interacted_projects, [:project_id, :user_id], unique: true + unless index_exists?(:user_interacted_projects, [:project_id, :user_id]) + add_concurrent_index :user_interacted_projects, [:project_id, :user_id], unique: true + end + + unless foreign_key_exists?(:user_interacted_projects, :user_id) + add_concurrent_foreign_key :user_interacted_projects, :users, column: :user_id, on_delete: :cascade + end - add_concurrent_foreign_key :user_interacted_projects, :users, column: :user_id, on_delete: :cascade - add_concurrent_foreign_key :user_interacted_projects, :projects, column: :project_id, on_delete: :cascade + unless foreign_key_exists?(:user_interacted_projects, :project_id) + add_concurrent_foreign_key :user_interacted_projects, :projects, column: :project_id, on_delete: :cascade + end end def down execute "TRUNCATE user_interacted_projects" - remove_foreign_key :user_interacted_projects, :users - remove_foreign_key :user_interacted_projects, :projects + if foreign_key_exists?(:user_interacted_projects, :user_id) + remove_foreign_key :user_interacted_projects, :users + end + + if foreign_key_exists?(:user_interacted_projects, :project_id) + remove_foreign_key :user_interacted_projects, :projects + end - remove_concurrent_index_by_name :user_interacted_projects, 'index_user_interacted_projects_on_project_id_and_user_id' + if index_exists_by_name?(:user_interacted_projects, 'index_user_interacted_projects_on_project_id_and_user_id') + remove_concurrent_index_by_name :user_interacted_projects, 'index_user_interacted_projects_on_project_id_and_user_id' + end end private + # Rails' index_exists? doesn't work when you only give it a table and index + # name. As such we have to use some extra code to check if an index exists for + # a given name. + def index_exists_by_name?(table, index) + indexes_for_table[table].include?(index) + end + + def indexes_for_table + @indexes_for_table ||= Hash.new do |hash, table_name| + hash[table_name] = indexes(table_name).map(&:name) + end + end + + def foreign_key_exists?(table, column) + foreign_keys(table).any? do |key| + key.options[:column] == column.to_s + end + end + class PostgresStrategy < ActiveRecord::Migration include Gitlab::Database::MigrationHelpers -- cgit v1.2.1