summaryrefslogtreecommitdiff
path: root/db/post_migrate/20171128214150_schedule_populate_merge_request_metrics_with_events_data.rb
diff options
context:
space:
mode:
Diffstat (limited to 'db/post_migrate/20171128214150_schedule_populate_merge_request_metrics_with_events_data.rb')
-rw-r--r--db/post_migrate/20171128214150_schedule_populate_merge_request_metrics_with_events_data.rb68
1 files changed, 68 insertions, 0 deletions
diff --git a/db/post_migrate/20171128214150_schedule_populate_merge_request_metrics_with_events_data.rb b/db/post_migrate/20171128214150_schedule_populate_merge_request_metrics_with_events_data.rb
new file mode 100644
index 00000000000..547cc68e10e
--- /dev/null
+++ b/db/post_migrate/20171128214150_schedule_populate_merge_request_metrics_with_events_data.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+# rubocop:disable GitlabSecurity/SqlInjection
+
+class SchedulePopulateMergeRequestMetricsWithEventsData < ActiveRecord::Migration
+ DOWNTIME = false
+ BATCH_SIZE = 10_000
+ MIGRATION = 'PopulateMergeRequestMetricsWithEventsData'
+
+ disable_ddl_transaction!
+
+ class MergeRequest < ActiveRecord::Base
+ self.table_name = 'merge_requests'
+
+ include ::EachBatch
+ end
+
+ def up
+ merge_requests = MergeRequest.where("id IN (#{updatable_merge_requests_union_sql})").reorder(:id)
+
+ say 'Scheduling `PopulateMergeRequestMetricsWithEventsData` jobs'
+ # It will update around 4_000_000 records in batches of 10_000 merge
+ # requests (running between 10 minutes) and should take around 66 hours to complete.
+ # Apparently, production PostgreSQL is able to vacuum 10k-20k dead_tuples by
+ # minute, and at maximum, each of these jobs should UPDATE 20k records.
+ #
+ # More information about the updates in `PopulateMergeRequestMetricsWithEventsData` class.
+ #
+ merge_requests.each_batch(of: BATCH_SIZE) do |relation, index|
+ range = relation.pluck('MIN(id)', 'MAX(id)').first
+
+ BackgroundMigrationWorker.perform_in(index * 10.minutes, MIGRATION, range)
+ end
+ end
+
+ def down
+ execute "update merge_request_metrics set latest_closed_at = null"
+ execute "update merge_request_metrics set latest_closed_by_id = null"
+ execute "update merge_request_metrics set merged_by_id = null"
+ end
+
+ private
+
+ # On staging:
+ # Planning time: 0.682 ms
+ # Execution time: 22033.158 ms
+ #
+ def updatable_merge_requests_union_sql
+ metrics_not_exists_clause =
+ 'NOT EXISTS (SELECT 1 FROM merge_request_metrics WHERE merge_request_metrics.merge_request_id = merge_requests.id)'
+
+ without_metrics_data = <<-SQL.strip_heredoc
+ merge_request_metrics.merged_by_id IS NULL OR
+ merge_request_metrics.latest_closed_by_id IS NULL OR
+ merge_request_metrics.latest_closed_at IS NULL
+ SQL
+
+ mrs_without_metrics_record = MergeRequest
+ .where(metrics_not_exists_clause)
+ .select(:id)
+
+ mrs_without_events_data = MergeRequest
+ .joins('INNER JOIN merge_request_metrics ON merge_requests.id = merge_request_metrics.merge_request_id')
+ .where(without_metrics_data)
+ .select(:id)
+
+ Gitlab::SQL::Union.new([mrs_without_metrics_record, mrs_without_events_data]).to_sql
+ end
+end