summaryrefslogtreecommitdiff
path: root/db/post_migrate/20171128214150_schedule_populate_merge_request_metrics_with_events_data.rb
blob: 547cc68e10e8297fc2d629e1c87accc65c974aed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# frozen_string_literal: true
# rubocop:disable GitlabSecurity/SqlInjection

class SchedulePopulateMergeRequestMetricsWithEventsData < ActiveRecord::Migration
  DOWNTIME = false
  BATCH_SIZE = 10_000
  MIGRATION = 'PopulateMergeRequestMetricsWithEventsData'

  disable_ddl_transaction!

  class MergeRequest < ActiveRecord::Base
    self.table_name = 'merge_requests'

    include ::EachBatch
  end

  def up
    merge_requests = MergeRequest.where("id IN (#{updatable_merge_requests_union_sql})").reorder(:id)

    say 'Scheduling `PopulateMergeRequestMetricsWithEventsData` jobs'
    # It will update around 4_000_000 records in batches of 10_000 merge
    # requests (running between 10 minutes) and should take around 66 hours to complete.
    # Apparently, production PostgreSQL is able to vacuum 10k-20k dead_tuples by
    # minute, and at maximum, each of these jobs should UPDATE 20k records.
    #
    # More information about the updates in `PopulateMergeRequestMetricsWithEventsData` class.
    #
    merge_requests.each_batch(of: BATCH_SIZE) do |relation, index|
      range = relation.pluck('MIN(id)', 'MAX(id)').first

      BackgroundMigrationWorker.perform_in(index * 10.minutes, MIGRATION, range)
    end
  end

  def down
    execute "update merge_request_metrics set latest_closed_at = null"
    execute "update merge_request_metrics set latest_closed_by_id = null"
    execute "update merge_request_metrics set merged_by_id = null"
  end

  private

  # On staging:
  # Planning time: 0.682 ms
  # Execution time: 22033.158 ms
  #
  def updatable_merge_requests_union_sql
    metrics_not_exists_clause =
      'NOT EXISTS (SELECT 1 FROM merge_request_metrics WHERE merge_request_metrics.merge_request_id = merge_requests.id)'

    without_metrics_data = <<-SQL.strip_heredoc
      merge_request_metrics.merged_by_id IS NULL OR
      merge_request_metrics.latest_closed_by_id IS NULL OR
      merge_request_metrics.latest_closed_at IS NULL
    SQL

    mrs_without_metrics_record = MergeRequest
      .where(metrics_not_exists_clause)
      .select(:id)

    mrs_without_events_data = MergeRequest
      .joins('INNER JOIN merge_request_metrics ON merge_requests.id = merge_request_metrics.merge_request_id')
      .where(without_metrics_data)
      .select(:id)

    Gitlab::SQL::Union.new([mrs_without_metrics_record, mrs_without_events_data]).to_sql
  end
end