summaryrefslogtreecommitdiff
path: root/db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb
diff options
context:
space:
mode:
Diffstat (limited to 'db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb')
-rw-r--r--db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb111
1 files changed, 111 insertions, 0 deletions
diff --git a/db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb b/db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb
new file mode 100644
index 00000000000..6fdc30d09c6
--- /dev/null
+++ b/db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+class RemoveDuplicateProjectAuthorizations < ActiveRecord::Migration[6.1]
+ include Gitlab::Database::MigrationHelpers
+
+ BATCH_SIZE = 10_000
+ OLD_INDEX_NAME = 'index_project_authorizations_on_project_id_user_id'
+ INDEX_NAME = 'index_unique_project_authorizations_on_project_id_user_id'
+
+ class ProjectAuthorization < ActiveRecord::Base
+ self.table_name = 'project_authorizations'
+ end
+
+ disable_ddl_transaction!
+
+ def up
+ batch do |first_record, last_record|
+ break if first_record.blank?
+
+ # construct a range query where we filter records between the first and last records
+ rows = ActiveRecord::Base.connection.execute <<~SQL
+ SELECT user_id, project_id
+ FROM project_authorizations
+ WHERE
+ #{start_condition(first_record)}
+ #{end_condition(last_record)}
+ GROUP BY user_id, project_id
+ HAVING COUNT(*) > 1
+ SQL
+
+ rows.each do |row|
+ deduplicate_item(row['project_id'], row['user_id'])
+ end
+ end
+
+ add_concurrent_index :project_authorizations, [:project_id, :user_id], unique: true, name: INDEX_NAME
+ remove_concurrent_index_by_name :project_authorizations, OLD_INDEX_NAME
+ end
+
+ def down
+ add_concurrent_index(:project_authorizations, [:project_id, :user_id], name: OLD_INDEX_NAME)
+ remove_concurrent_index_by_name(:project_authorizations, INDEX_NAME)
+ end
+
+ private
+
+ def start_condition(record)
+ "(user_id, project_id) >= (#{Integer(record.user_id)}, #{Integer(record.project_id)})"
+ end
+
+ def end_condition(record)
+ return "" unless record
+
+ "AND (user_id, project_id) <= (#{Integer(record.user_id)}, #{Integer(record.project_id)})"
+ end
+
+ def batch(&block)
+ order = Gitlab::Pagination::Keyset::Order.build([
+ Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
+ attribute_name: 'user_id',
+ order_expression: ProjectAuthorization.arel_table[:user_id].asc,
+ nullable: :not_nullable,
+ distinct: false
+ ),
+ Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
+ attribute_name: 'project_id',
+ order_expression: ProjectAuthorization.arel_table[:project_id].asc,
+ nullable: :not_nullable,
+ distinct: false
+ ),
+ Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
+ attribute_name: 'access_level',
+ order_expression: ProjectAuthorization.arel_table[:access_level].asc,
+ nullable: :not_nullable,
+ distinct: true
+ )
+ ])
+
+ scope = ProjectAuthorization.order(order)
+ cursor = {}
+ loop do
+ current_scope = scope.dup
+
+ relation = order.apply_cursor_conditions(current_scope, cursor)
+ first_record = relation.take
+ last_record = relation.offset(BATCH_SIZE).take
+
+ yield first_record, last_record
+
+ break if last_record.blank?
+
+ cursor = order.cursor_attributes_for_node(last_record)
+ end
+ end
+
+ def deduplicate_item(project_id, user_id)
+ auth_records = ProjectAuthorization.where(project_id: project_id, user_id: user_id).order(access_level: :desc).to_a
+
+ ActiveRecord::Base.transaction do
+ # Keep the highest access level and destroy the rest.
+ auth_records[1..].each do |record|
+ ProjectAuthorization
+ .where(
+ project_id: record.project_id,
+ user_id: record.user_id,
+ access_level: record.access_level
+ ).delete_all
+ end
+ end
+ end
+end