diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-11-18 13:16:36 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-11-18 13:16:36 +0000 |
commit | 311b0269b4eb9839fa63f80c8d7a58f32b8138a0 (patch) | |
tree | 07e7870bca8aed6d61fdcc810731c50d2c40af47 /db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb | |
parent | 27909cef6c4170ed9205afa7426b8d3de47cbb0c (diff) | |
download | gitlab-ce-311b0269b4eb9839fa63f80c8d7a58f32b8138a0.tar.gz |
Add latest changes from gitlab-org/gitlab@14-5-stable-eev14.5.0-rc42
Diffstat (limited to 'db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb')
-rw-r--r-- | db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb | 261 |
1 files changed, 0 insertions, 261 deletions
diff --git a/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb b/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb deleted file mode 100644 index 8e6ee0f35ca..00000000000 --- a/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb +++ /dev/null @@ -1,261 +0,0 @@ -# frozen_string_literal: true - -# rubocop:disable Migration/PreventStrings - -# This migration cleans up Projects that were orphaned when their namespace was deleted -# Instead of deleting them, we: -# - Find (or create) the Ghost User -# - Create (if not already exists) a `lost-and-found` group owned by the Ghost User -# - Find orphaned projects --> namespace_id can not be found in namespaces -# - Move the orphaned projects to the `lost-and-found` group -# (while making them private and setting `archived=true`) -# -# On GitLab.com (2020-05-11) this migration will update 66 orphaned projects -class CleanupProjectsWithMissingNamespace < ActiveRecord::Migration[6.0] - include Gitlab::Database::MigrationHelpers - - DOWNTIME = false - VISIBILITY_PRIVATE = 0 - ACCESS_LEVEL_OWNER = 50 - - # The batch size of projects to check in each iteration - # We expect the selectivity for orphaned projects to be very low: - # (66 orphaned projects out of a total 13.6M) - # so 10K should be a safe choice - BATCH_SIZE = 10000 - - disable_ddl_transaction! - - class UserDetail < ActiveRecord::Base - self.table_name = 'user_details' - - belongs_to :user, class_name: 'CleanupProjectsWithMissingNamespace::User' - end - - class User < ActiveRecord::Base - self.table_name = 'users' - - LOST_AND_FOUND_GROUP = 'lost-and-found' - USER_TYPE_GHOST = 5 - DEFAULT_PROJECTS_LIMIT = 100000 - - default_value_for :admin, false - default_value_for :can_create_group, true # we need this to create the group - default_value_for :can_create_team, false - default_value_for :project_view, :files - default_value_for :notified_of_own_activity, false - default_value_for :preferred_language, I18n.default_locale - - has_one :user_detail, class_name: 'CleanupProjectsWithMissingNamespace::UserDetail' - has_one :namespace, -> { where(type: nil) }, - foreign_key: :owner_id, inverse_of: :owner, autosave: true, - class_name: 'CleanupProjectsWithMissingNamespace::Namespace' - - before_save :ensure_namespace_correct - before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed? - - enum project_view: { readme: 0, activity: 1, files: 2 } - - def ensure_namespace_correct - if namespace - namespace.path = username if username_changed? - namespace.name = name if name_changed? - else - build_namespace(path: username, name: name) - end - end - - def ensure_bio_is_assigned_to_user_details - user_detail.bio = bio.to_s[0...255] - end - - def user_detail - super.presence || build_user_detail - end - - # Return (or create if necessary) the `lost-and-found` group - def lost_and_found_group - existing_lost_and_found_group || Group.create_unique_group(self, LOST_AND_FOUND_GROUP) - end - - def existing_lost_and_found_group - # There should only be one Group for User Ghost starting with LOST_AND_FOUND_GROUP - Group - .joins('INNER JOIN members ON namespaces.id = members.source_id') - .where(namespaces: { type: 'Group' }) - .where(members: { type: 'GroupMember' }) - .where(members: { source_type: 'Namespace' }) - .where(members: { user_id: self.id }) - .where(members: { requested_at: nil }) - .where(members: { access_level: ACCESS_LEVEL_OWNER }) - .find_by(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%")) - end - - class << self - # Return (or create if necessary) the ghost user - def ghost - email = 'ghost%s@example.com' - - unique_internal(where(user_type: USER_TYPE_GHOST), 'ghost', email) do |u| - u.bio = _('This is a "Ghost User", created to hold all issues authored by users that have since been deleted. This user cannot be removed.') - u.name = 'Ghost User' - end - end - - def unique_internal(scope, username, email_pattern, &block) - scope.first || create_unique_internal(scope, username, email_pattern, &block) - end - - def create_unique_internal(scope, username, email_pattern, &creation_block) - # Since we only want a single one of these in an instance, we use an - # exclusive lease to ensure that this block is never run concurrently. - lease_key = "user:unique_internal:#{username}" - lease = Gitlab::ExclusiveLease.new(lease_key, timeout: 1.minute.to_i) - - until uuid = lease.try_obtain - # Keep trying until we obtain the lease. To prevent hammering Redis too - # much we'll wait for a bit between retries. - sleep(1) - end - - # Recheck if the user is already present. One might have been - # added between the time we last checked (first line of this method) - # and the time we acquired the lock. - existing_user = uncached { scope.first } - return existing_user if existing_user.present? - - uniquify = Uniquify.new - - username = uniquify.string(username) { |s| User.find_by_username(s) } - - email = uniquify.string(-> (n) { Kernel.sprintf(email_pattern, n) }) do |s| - User.find_by_email(s) - end - - User.create!( - username: username, - email: email, - user_type: USER_TYPE_GHOST, - projects_limit: DEFAULT_PROJECTS_LIMIT, - state: :active, - &creation_block - ) - ensure - Gitlab::ExclusiveLease.cancel(lease_key, uuid) - end - end - end - - class Namespace < ActiveRecord::Base - self.table_name = 'namespaces' - - belongs_to :owner, class_name: 'CleanupProjectsWithMissingNamespace::User' - end - - class Group < Namespace - # Disable STI to allow us to manually set "type = 'Group'" - # Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::Group" - self.inheritance_column = :_type_disabled - - def self.create_unique_group(user, group_name) - # 'lost-and-found' may be already defined, find a unique one - group_name = Uniquify.new.string(group_name) do |str| - Group.where(parent_id: nil, name: str).exists? - end - - group = Group.create!( - name: group_name, - path: group_name, - type: 'Group', - description: 'Group to store orphaned projects', - visibility_level: VISIBILITY_PRIVATE - ) - - # No need to create a route for the lost-and-found group - - GroupMember.add_user(group, user, ACCESS_LEVEL_OWNER) - - group - end - end - - class Member < ActiveRecord::Base - self.table_name = 'members' - end - - class GroupMember < Member - NOTIFICATION_SETTING_GLOBAL = 3 - - # Disable STI to allow us to manually set "type = 'GroupMember'" - # Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::GroupMember" - self.inheritance_column = :_type_disabled - - def self.add_user(source, user, access_level) - GroupMember.create!( - type: 'GroupMember', - source_id: source.id, - user_id: user.id, - source_type: 'Namespace', - access_level: access_level, - notification_level: NOTIFICATION_SETTING_GLOBAL - ) - end - end - - class Project < ActiveRecord::Base - self.table_name = 'projects' - - include ::EachBatch - - def self.without_namespace - where( - 'NOT EXISTS ( - SELECT 1 - FROM namespaces - WHERE projects.namespace_id = namespaces.id - )' - ) - end - end - - def up - # Reset the column information of all the models that update the database - # to ensure the Active Record's knowledge of the table structure is current - User.reset_column_information - Namespace.reset_column_information - Member.reset_column_information - Project.reset_column_information - - # Find or Create the ghost user - ghost_user = User.ghost - - # Find or Create the `lost-and-found` - lost_and_found = ghost_user.lost_and_found_group - - # With BATCH_SIZE=10K and projects.count=13.6M - # ~1360 iterations will be run: - # - each requires on average ~160ms for relation.without_namespace - # - worst case scenario is that 66 of those batches will trigger an update (~200ms each) - # In general, we expect less than 5% (=66/13.6M x 10K) to trigger an update - # Expected total run time: ~235 seconds (== 220 seconds + 14 seconds) - Project.each_batch(of: BATCH_SIZE) do |relation| - relation.without_namespace.update_all <<~SQL - namespace_id = #{lost_and_found.id}, - archived = TRUE, - visibility_level = #{VISIBILITY_PRIVATE}, - - -- Names are expected to be unique inside their namespace - -- (uniqueness validation on namespace_id, name) - -- Attach the id to the name and path to make sure that they are unique - name = name || '_' || id::text, - path = path || '_' || id::text - SQL - end - end - - def down - # no-op: the original state for those projects was inconsistent - # Also, the original namespace_id for each project is lost during the update - end -end -# rubocop:enable Migration/PreventStrings |