summaryrefslogtreecommitdiff
path: root/db/post_migrate/20171207150343_remove_soft_removed_objects.rb
blob: 542cfb42fdcd07727ed61dbbc9dc36365c7ba482 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.

class RemoveSoftRemovedObjects < ActiveRecord::Migration
  include Gitlab::Database::MigrationHelpers

  # Set this constant to true if this migration requires downtime.
  DOWNTIME = false

  disable_ddl_transaction!

  module SoftRemoved
    extend ActiveSupport::Concern

    included do
      scope :soft_removed, -> { where('deleted_at IS NOT NULL') }
    end
  end

  class User < ActiveRecord::Base
    self.table_name = 'users'

    include EachBatch
  end

  class Issue < ActiveRecord::Base
    self.table_name = 'issues'

    include EachBatch
    include SoftRemoved
  end

  class MergeRequest < ActiveRecord::Base
    self.table_name = 'merge_requests'

    include EachBatch
    include SoftRemoved
  end

  class Namespace < ActiveRecord::Base
    self.table_name = 'namespaces'

    include EachBatch
    include SoftRemoved

    scope :soft_removed_personal, -> { soft_removed.where(type: nil) }
    scope :soft_removed_group, -> { soft_removed.where(type: 'Group') }
  end

  class Route < ActiveRecord::Base
    self.table_name = 'routes'

    include EachBatch
    include SoftRemoved
  end

  class Project < ActiveRecord::Base
    self.table_name = 'projects'

    include EachBatch
    include SoftRemoved
  end

  class CiPipelineSchedule < ActiveRecord::Base
    self.table_name = 'ci_pipeline_schedules'

    include EachBatch
    include SoftRemoved
  end

  class CiTrigger < ActiveRecord::Base
    self.table_name = 'ci_triggers'

    include EachBatch
    include SoftRemoved
  end

  MODELS = [Issue, MergeRequest, CiPipelineSchedule, CiTrigger].freeze

  def up
    disable_statement_timeout

    remove_personal_routes
    remove_personal_namespaces
    remove_group_namespaces
    remove_simple_soft_removed_rows
  end

  def down
    # The data removed by this migration can't be restored in an automated way.
  end

  def remove_simple_soft_removed_rows
    create_temporary_indexes

    MODELS.each do |model|
      say_with_time("Removing soft removed rows from #{model.table_name}") do
        model.soft_removed.each_batch do |batch, index|
          batch.delete_all
        end
      end
    end
  ensure
    remove_temporary_indexes
  end

  def create_temporary_indexes
    MODELS.each do |model|
      index_name = temporary_index_name_for(model)

      # Without this index the removal process can take a very long time. For
      # example, getting the next ID of a batch for the `issues` table in
      # staging would take between 15 and 20 seconds.
      next if temporary_index_exists?(model)

      say_with_time("Creating temporary index #{index_name}") do
        add_concurrent_index(
          model.table_name,
          [:deleted_at, :id],
          name: index_name,
          where: 'deleted_at IS NOT NULL'
        )
      end
    end
  end

  def remove_temporary_indexes
    MODELS.each do |model|
      index_name = temporary_index_name_for(model)

      next unless temporary_index_exists?(model)

      say_with_time("Removing temporary index #{index_name}") do
        remove_concurrent_index_by_name(model.table_name, index_name)
      end
    end
  end

  def temporary_index_name_for(model)
    "index_on_#{model.table_name}_tmp"
  end

  def temporary_index_exists?(model)
    index_name = temporary_index_name_for(model)

    index_exists?(model.table_name, [:deleted_at, :id], name: index_name)
  end

  def remove_personal_namespaces
    # Some personal namespaces are left behind in case of GitLab.com. In these
    # cases the associated data such as the projects and users has already been
    # removed.
    Namespace.soft_removed_personal.each_batch do |batch|
      batch.delete_all
    end
  end

  def remove_group_namespaces
    # Left over groups can't be easily removed because we may also need to
    # remove memberships, repositories, and other associated data. As a result
    # we'll just schedule a Sidekiq job to remove these.
    #
    # As of January 5th, 2018 there are 36 groups that will be removed using
    # this code.
    Namespace.select(:id).soft_removed_group.each_batch(of: 10) do |batch, index|
      # We need the ID of an admin user as the owners of the group may no longer
      # exist (or might not even be set in `namespaces.owner_id`).
      admin_id = id_for_admin_user

      batch.each do |ns|
        schedule_group_removal(index * 5.minutes, ns.id, admin_id)
      end
    end
  end

  def schedule_group_removal(delay, group_id, user_id)
    if migrate_inline?
      GroupDestroyWorker.new.perform(group_id, user_id)
    else
      GroupDestroyWorker.perform_in(delay, group_id, user_id)
    end
  end

  def remove_personal_routes
    namespaces = Namespace.select(1)
      .soft_removed
      .where('namespaces.type IS NULL')
      .where('routes.source_type = ?', 'Namespace')
      .where('routes.source_id = namespaces.id')

    Route.where('EXISTS (?)', namespaces).each_batch do |batch|
      batch.delete_all
    end
  end

  def id_for_admin_user
    return @id_for_admin_user if @id_for_admin_user

    if (admin_id = User.where(admin: true).limit(1).pluck(:id).first)
      @id_for_admin_user = admin_id
    else
      raise 'Can not remove soft removed groups as no admin user exists. ' \
        'Please make sure at least one user with `admin` set to TRUE exists before proceeding.'
    end
  end

  def migrate_inline?
    Rails.env.test? || Rails.env.development?
  end
end