diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2020-05-20 14:34:42 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2020-05-20 14:34:42 +0000 |
commit | 9f46488805e86b1bc341ea1620b866016c2ce5ed (patch) | |
tree | f9748c7e287041e37d6da49e0a29c9511dc34768 /db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb | |
parent | dfc92d081ea0332d69c8aca2f0e745cb48ae5e6d (diff) | |
download | gitlab-ce-9f46488805e86b1bc341ea1620b866016c2ce5ed.tar.gz |
Add latest changes from gitlab-org/gitlab@13-0-stable-ee
Diffstat (limited to 'db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb')
-rw-r--r-- | db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb b/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb new file mode 100644 index 00000000000..442acfc6d16 --- /dev/null +++ b/db/post_migrate/20200511083541_cleanup_projects_with_missing_namespace.rb @@ -0,0 +1,263 @@ +# frozen_string_literal: true + +# rubocop:disable Migration/PreventStrings + +# This migration cleans up Projects that were orphaned when their namespace was deleted +# Instead of deleting them, we: +# - Find (or create) the Ghost User +# - Create (if not already exists) a `lost-and-found` group owned by the Ghost User +# - Find orphaned projects --> namespace_id can not be found in namespaces +# - Move the orphaned projects to the `lost-and-found` group +# (while making them private and setting `archived=true`) +# +# On GitLab.com (2020-05-11) this migration will update 66 orphaned projects +class CleanupProjectsWithMissingNamespace < ActiveRecord::Migration[6.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + VISIBILITY_PRIVATE = 0 + ACCESS_LEVEL_OWNER = 50 + + # The batch size of projects to check in each iteration + # We expect the selectivity for orphaned projects to be very low: + # (66 orphaned projects out of a total 13.6M) + # so 10K should be a safe choice + BATCH_SIZE = 10000 + + disable_ddl_transaction! + + class UserDetail < ActiveRecord::Base + self.table_name = 'user_details' + + belongs_to :user, class_name: 'CleanupProjectsWithMissingNamespace::User' + end + + class User < ActiveRecord::Base + self.table_name = 'users' + + LOST_AND_FOUND_GROUP = 'lost-and-found' + USER_TYPE_GHOST = 5 + DEFAULT_PROJECTS_LIMIT = 100000 + + default_value_for :admin, false + default_value_for :can_create_group, true # we need this to create the group + default_value_for :can_create_team, false + default_value_for :project_view, :files + default_value_for :notified_of_own_activity, false + default_value_for :preferred_language, I18n.default_locale + + has_one :user_detail, class_name: 'CleanupProjectsWithMissingNamespace::UserDetail' + has_one :namespace, -> { where(type: nil) }, + foreign_key: :owner_id, inverse_of: :owner, autosave: true, + class_name: 'CleanupProjectsWithMissingNamespace::Namespace' + + before_save :ensure_namespace_correct + before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed? + + enum project_view: { readme: 0, activity: 1, files: 2 } + + def ensure_namespace_correct + if namespace + namespace.path = username if username_changed? + namespace.name = name if name_changed? + else + build_namespace(path: username, name: name) + end + end + + def ensure_bio_is_assigned_to_user_details + return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true) + + user_detail.bio = bio.to_s[0...255] + end + + def user_detail + super.presence || build_user_detail + end + + # Return (or create if necessary) the `lost-and-found` group + def lost_and_found_group + existing_lost_and_found_group || Group.create_unique_group(self, LOST_AND_FOUND_GROUP) + end + + def existing_lost_and_found_group + # There should only be one Group for User Ghost starting with LOST_AND_FOUND_GROUP + Group + .joins('INNER JOIN members ON namespaces.id = members.source_id') + .where('namespaces.type = ?', 'Group') + .where('members.type = ?', 'GroupMember') + .where('members.source_type = ?', 'Namespace') + .where('members.user_id = ?', self.id) + .where('members.requested_at IS NULL') + .where('members.access_level = ?', ACCESS_LEVEL_OWNER) + .find_by(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%")) + end + + class << self + # Return (or create if necessary) the ghost user + def ghost + email = 'ghost%s@example.com' + + unique_internal(where(user_type: USER_TYPE_GHOST), 'ghost', email) do |u| + u.bio = _('This is a "Ghost User", created to hold all issues authored by users that have since been deleted. This user cannot be removed.') + u.name = 'Ghost User' + end + end + + def unique_internal(scope, username, email_pattern, &block) + scope.first || create_unique_internal(scope, username, email_pattern, &block) + end + + def create_unique_internal(scope, username, email_pattern, &creation_block) + # Since we only want a single one of these in an instance, we use an + # exclusive lease to ensure that this block is never run concurrently. + lease_key = "user:unique_internal:#{username}" + lease = Gitlab::ExclusiveLease.new(lease_key, timeout: 1.minute.to_i) + + until uuid = lease.try_obtain + # Keep trying until we obtain the lease. To prevent hammering Redis too + # much we'll wait for a bit between retries. + sleep(1) + end + + # Recheck if the user is already present. One might have been + # added between the time we last checked (first line of this method) + # and the time we acquired the lock. + existing_user = uncached { scope.first } + return existing_user if existing_user.present? + + uniquify = Uniquify.new + + username = uniquify.string(username) { |s| User.find_by_username(s) } + + email = uniquify.string(-> (n) { Kernel.sprintf(email_pattern, n) }) do |s| + User.find_by_email(s) + end + + User.create!( + username: username, + email: email, + user_type: USER_TYPE_GHOST, + projects_limit: DEFAULT_PROJECTS_LIMIT, + state: :active, + &creation_block + ) + ensure + Gitlab::ExclusiveLease.cancel(lease_key, uuid) + end + end + end + + class Namespace < ActiveRecord::Base + self.table_name = 'namespaces' + + belongs_to :owner, class_name: 'CleanupProjectsWithMissingNamespace::User' + end + + class Group < Namespace + # Disable STI to allow us to manually set "type = 'Group'" + # Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::Group" + self.inheritance_column = :_type_disabled + + def self.create_unique_group(user, group_name) + # 'lost-and-found' may be already defined, find a unique one + group_name = Uniquify.new.string(group_name) do |str| + Group.where(parent_id: nil, name: str).exists? + end + + group = Group.create!( + name: group_name, + path: group_name, + type: 'Group', + description: 'Group to store orphaned projects', + visibility_level: VISIBILITY_PRIVATE + ) + + # No need to create a route for the lost-and-found group + + GroupMember.add_user(group, user, ACCESS_LEVEL_OWNER) + + group + end + end + + class Member < ActiveRecord::Base + self.table_name = 'members' + end + + class GroupMember < Member + NOTIFICATION_SETTING_GLOBAL = 3 + + # Disable STI to allow us to manually set "type = 'GroupMember'" + # Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::GroupMember" + self.inheritance_column = :_type_disabled + + def self.add_user(source, user, access_level) + GroupMember.create!( + type: 'GroupMember', + source_id: source.id, + user_id: user.id, + source_type: 'Namespace', + access_level: access_level, + notification_level: NOTIFICATION_SETTING_GLOBAL + ) + end + end + + class Project < ActiveRecord::Base + self.table_name = 'projects' + + include ::EachBatch + + def self.without_namespace + where( + 'NOT EXISTS ( + SELECT 1 + FROM namespaces + WHERE projects.namespace_id = namespaces.id + )' + ) + end + end + + def up + # Reset the column information of all the models that update the database + # to ensure the Active Record's knowledge of the table structure is current + User.reset_column_information + Namespace.reset_column_information + Member.reset_column_information + Project.reset_column_information + + # Find or Create the ghost user + ghost_user = User.ghost + + # Find or Create the `lost-and-found` + lost_and_found = ghost_user.lost_and_found_group + + # With BATCH_SIZE=10K and projects.count=13.6M + # ~1360 iterations will be run: + # - each requires on average ~160ms for relation.without_namespace + # - worst case scenario is that 66 of those batches will trigger an update (~200ms each) + # In general, we expect less than 5% (=66/13.6M x 10K) to trigger an update + # Expected total run time: ~235 seconds (== 220 seconds + 14 seconds) + Project.each_batch(of: BATCH_SIZE) do |relation| + relation.without_namespace.update_all <<~SQL + namespace_id = #{lost_and_found.id}, + archived = TRUE, + visibility_level = #{VISIBILITY_PRIVATE}, + + -- Names are expected to be unique inside their namespace + -- (uniqueness validation on namespace_id, name) + -- Attach the id to the name and path to make sure that they are unique + name = name || '_' || id, + path = path || '_' || id + SQL + end + end + + def down + # no-op: the original state for those projects was inconsistent + # Also, the original namespace_id for each project is lost during the update + end +end +# rubocop:enable Migration/PreventStrings |