summaryrefslogtreecommitdiff
path: root/lib/tasks/gitlab/storage.rake
blob: 6e8bd9078c8d8fab1be537ec3884de1a615a45ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
namespace :gitlab do
  namespace :storage do
    desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
    task migrate_to_hashed: :environment do
      legacy_projects_count = Project.with_unmigrated_storage.count

      if legacy_projects_count == 0
        puts 'There are no projects requiring storage migration. Nothing to do!'

        next
      end

      print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{batch_size}"

      project_id_batches do |start, finish|
        StorageMigratorWorker.perform_async(start, finish)

        print '.'
      end

      puts ' Done!'
    end

    desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage'
    task legacy_projects: :environment do
      relation_summary('projects', Project.without_storage_feature(:repository))
    end

    desc 'Gitlab | Storage | List existing projects using Legacy Storage'
    task list_legacy_projects: :environment do
      projects_list('projects using Legacy Storage', Project.without_storage_feature(:repository))
    end

    desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage'
    task hashed_projects: :environment do
      relation_summary('projects using Hashed Storage', Project.with_storage_feature(:repository))
    end

    desc 'Gitlab | Storage | List existing projects using Hashed Storage'
    task list_hashed_projects: :environment do
      projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository))
    end

    desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage'
    task legacy_attachments: :environment do
      relation_summary('attachments using Legacy Storage', legacy_attachments_relation)
    end

    desc 'Gitlab | Storage | List existing project attachments using Legacy Storage'
    task list_legacy_attachments: :environment do
      attachments_list('attachments using Legacy Storage', legacy_attachments_relation)
    end

    desc 'Gitlab | Storage | Summary of project attachments using Hashed Storage'
    task hashed_attachments: :environment do
      relation_summary('attachments using Hashed Storage', hashed_attachments_relation)
    end

    desc 'Gitlab | Storage | List existing project attachments using Hashed Storage'
    task list_hashed_attachments: :environment do
      attachments_list('attachments using Hashed Storage', hashed_attachments_relation)
    end

    def batch_size
      ENV.fetch('BATCH', 200).to_i
    end

    def project_id_batches(&block)
      Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
        ids = relation.pluck(:id)

        yield ids.min, ids.max
      end
    end

    def legacy_attachments_relation
      Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
        JOIN projects
          ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
      SQL
    end

    def hashed_attachments_relation
      Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
        JOIN projects
        ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
      SQL
    end

    def relation_summary(relation_name, relation)
      relation_count = relation.count
      puts "* Found #{relation_count} #{relation_name}".color(:green)

      relation_count
    end

    def projects_list(relation_name, relation)
      relation_count = relation_summary(relation_name, relation)

      projects = relation.with_route
      limit = ENV.fetch('LIMIT', 500).to_i

      return unless relation_count > 0

      puts "  ! Displaying first #{limit} #{relation_name}..." if relation_count > limit

      counter = 0
      projects.find_in_batches(batch_size: batch_size) do |batch|
        batch.each do |project|
          counter += 1

          puts "  - #{project.full_path} (id: #{project.id})".color(:red)

          return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator, Cop/AvoidReturnFromBlocks
        end
      end
    end

    def attachments_list(relation_name, relation)
      relation_count = relation_summary(relation_name, relation)

      limit = ENV.fetch('LIMIT', 500).to_i

      return unless relation_count > 0

      puts "  ! Displaying first #{limit} #{relation_name}..." if relation_count > limit

      counter = 0
      relation.find_in_batches(batch_size: batch_size) do |batch|
        batch.each do |upload|
          counter += 1

          puts "  - #{upload.path} (id: #{upload.id})".color(:red)

          return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator, Cop/AvoidReturnFromBlocks
        end
      end
    end
  end
end