From d0a08ab888db33437c7df4eb37b5805757a6dce4 Mon Sep 17 00:00:00 2001 From: Gabriel Mazetto Date: Sat, 18 Nov 2017 07:26:07 +0100 Subject: Improve storage migration rake task --- app/models/project.rb | 5 ++- lib/tasks/gitlab/storage.rake | 85 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 73 insertions(+), 17 deletions(-) diff --git a/app/models/project.rb b/app/models/project.rb index 894ded2a9f6..f20c0688bc2 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -272,8 +272,9 @@ class Project < ActiveRecord::Base scope :pending_delete, -> { where(pending_delete: true) } scope :without_deleted, -> { where(pending_delete: false) } - scope :with_hashed_storage, -> { where('storage_version >= 1') } - scope :with_legacy_storage, -> { where(storage_version: [nil, 0]) } + scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) } + scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) } + scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) } scope :sorted_by_activity, -> { reorder(last_activity_at: :desc) } scope :sorted_by_stars, -> { reorder('projects.star_count DESC') } diff --git a/lib/tasks/gitlab/storage.rake b/lib/tasks/gitlab/storage.rake index e05be4a3405..8ac73bc8ff2 100644 --- a/lib/tasks/gitlab/storage.rake +++ b/lib/tasks/gitlab/storage.rake @@ -2,10 +2,10 @@ namespace :gitlab do namespace :storage do desc 'GitLab | Storage | Migrate existing projects to Hashed Storage' task migrate_to_hashed: :environment do - legacy_projects_count = Project.with_legacy_storage.count + legacy_projects_count = Project.with_unmigrated_storage.count if legacy_projects_count == 0 - puts 'There are no projects using legacy storage. Nothing to do!' + puts 'There are no projects requiring storage migration. Nothing to do!' next end @@ -23,22 +23,42 @@ namespace :gitlab do desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage' task legacy_projects: :environment do - projects_summary(Project.with_legacy_storage) + relation_summary('projects', Project.without_storage_feature(:repository)) end desc 'Gitlab | Storage | List existing projects using Legacy Storage' task list_legacy_projects: :environment do - projects_list(Project.with_legacy_storage) + projects_list('projects using Legacy Storage', Project.without_storage_feature(:repository)) end desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage' task hashed_projects: :environment do - projects_summary(Project.with_hashed_storage) + relation_summary('projects using Hashed Storage', Project.with_storage_feature(:repository)) end desc 'Gitlab | Storage | List existing projects using Hashed Storage' task list_hashed_projects: :environment do - projects_list(Project.with_hashed_storage) + projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository)) + end + + desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage' + task legacy_attachments: :environment do + relation_summary('attachments using Legacy Storage', legacy_attachments_relation) + end + + desc 'Gitlab | Storage | List existing project attachments using Legacy Storage' + task list_legacy_attachments: :environment do + attachments_list('attachments using Legacy Storage', legacy_attachments_relation) + end + + desc 'Gitlab | Storage | Summary of project attachments using Hashed Storage' + task hashed_attachments: :environment do + relation_summary('attachments using Hashed Storage', hashed_attachments_relation) + end + + desc 'Gitlab | Storage | List existing project attachments using Hashed Storage' + task list_hashed_attachments: :environment do + attachments_list('attachments using Hashed Storage', hashed_attachments_relation) end def batch_size @@ -46,29 +66,43 @@ namespace :gitlab do end def project_id_batches(&block) - Project.with_legacy_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches + Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches ids = relation.pluck(:id) yield ids.min, ids.max end end - def projects_summary(relation) - projects_count = relation.count - puts "* Found #{projects_count} projects".color(:green) + def legacy_attachments_relation + Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments]) + JOIN projects + ON (uploads.model_type='Project' AND uploads.model_id=projects.id) + SQL + end + + def hashed_attachments_relation + Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments]) + JOIN projects + ON (uploads.model_type='Project' AND uploads.model_id=projects.id) + SQL + end + + def relation_summary(relation_name, relation) + relation_count = relation.count + puts "* Found #{relation_count} #{relation_name}".color(:green) - projects_count + relation_count end - def projects_list(relation) - projects_count = projects_summary(relation) + def projects_list(relation_name, relation) + relation_count = relation_summary(relation_name, relation) projects = relation.with_route limit = ENV.fetch('LIMIT', 500).to_i - return unless projects_count > 0 + return unless relation_count > 0 - puts " ! Displaying first #{limit} projects..." if projects_count > limit + puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit counter = 0 projects.find_in_batches(batch_size: batch_size) do |batch| @@ -81,5 +115,26 @@ namespace :gitlab do end end end + + def attachments_list(relation_name, relation) + relation_count = relation_summary(relation_name, relation) + + limit = ENV.fetch('LIMIT', 500).to_i + + return unless relation_count > 0 + + puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit + + counter = 0 + relation.find_in_batches(batch_size: batch_size) do |batch| + batch.each do |upload| + counter += 1 + + puts " - #{upload.path} (id: #{upload.id})".color(:red) + + return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator + end + end + end end end -- cgit v1.2.1