diff options
Diffstat (limited to 'db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb')
-rw-r--r-- | db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb b/db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb new file mode 100644 index 00000000000..6fdc30d09c6 --- /dev/null +++ b/db/post_migrate/20210812013042_remove_duplicate_project_authorizations.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +class RemoveDuplicateProjectAuthorizations < ActiveRecord::Migration[6.1] + include Gitlab::Database::MigrationHelpers + + BATCH_SIZE = 10_000 + OLD_INDEX_NAME = 'index_project_authorizations_on_project_id_user_id' + INDEX_NAME = 'index_unique_project_authorizations_on_project_id_user_id' + + class ProjectAuthorization < ActiveRecord::Base + self.table_name = 'project_authorizations' + end + + disable_ddl_transaction! + + def up + batch do |first_record, last_record| + break if first_record.blank? + + # construct a range query where we filter records between the first and last records + rows = ActiveRecord::Base.connection.execute <<~SQL + SELECT user_id, project_id + FROM project_authorizations + WHERE + #{start_condition(first_record)} + #{end_condition(last_record)} + GROUP BY user_id, project_id + HAVING COUNT(*) > 1 + SQL + + rows.each do |row| + deduplicate_item(row['project_id'], row['user_id']) + end + end + + add_concurrent_index :project_authorizations, [:project_id, :user_id], unique: true, name: INDEX_NAME + remove_concurrent_index_by_name :project_authorizations, OLD_INDEX_NAME + end + + def down + add_concurrent_index(:project_authorizations, [:project_id, :user_id], name: OLD_INDEX_NAME) + remove_concurrent_index_by_name(:project_authorizations, INDEX_NAME) + end + + private + + def start_condition(record) + "(user_id, project_id) >= (#{Integer(record.user_id)}, #{Integer(record.project_id)})" + end + + def end_condition(record) + return "" unless record + + "AND (user_id, project_id) <= (#{Integer(record.user_id)}, #{Integer(record.project_id)})" + end + + def batch(&block) + order = Gitlab::Pagination::Keyset::Order.build([ + Gitlab::Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: 'user_id', + order_expression: ProjectAuthorization.arel_table[:user_id].asc, + nullable: :not_nullable, + distinct: false + ), + Gitlab::Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: 'project_id', + order_expression: ProjectAuthorization.arel_table[:project_id].asc, + nullable: :not_nullable, + distinct: false + ), + Gitlab::Pagination::Keyset::ColumnOrderDefinition.new( + attribute_name: 'access_level', + order_expression: ProjectAuthorization.arel_table[:access_level].asc, + nullable: :not_nullable, + distinct: true + ) + ]) + + scope = ProjectAuthorization.order(order) + cursor = {} + loop do + current_scope = scope.dup + + relation = order.apply_cursor_conditions(current_scope, cursor) + first_record = relation.take + last_record = relation.offset(BATCH_SIZE).take + + yield first_record, last_record + + break if last_record.blank? + + cursor = order.cursor_attributes_for_node(last_record) + end + end + + def deduplicate_item(project_id, user_id) + auth_records = ProjectAuthorization.where(project_id: project_id, user_id: user_id).order(access_level: :desc).to_a + + ActiveRecord::Base.transaction do + # Keep the highest access level and destroy the rest. + auth_records[1..].each do |record| + ProjectAuthorization + .where( + project_id: record.project_id, + user_id: record.user_id, + access_level: record.access_level + ).delete_all + end + end + end +end |