summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--changelogs/unreleased/osw-update-mr-metrics-with-events-data.yml5
-rw-r--r--db/post_migrate/20181204154019_populate_mr_metrics_with_events_data.rb38
-rw-r--r--db/schema.rb2
-rw-r--r--lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved.rb99
-rw-r--r--spec/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved_spec.rb57
-rw-r--r--spec/migrations/populate_mr_metrics_with_events_data_spec.rb47
6 files changed, 247 insertions, 1 deletions
diff --git a/changelogs/unreleased/osw-update-mr-metrics-with-events-data.yml b/changelogs/unreleased/osw-update-mr-metrics-with-events-data.yml
new file mode 100644
index 00000000000..09a10a86adc
--- /dev/null
+++ b/changelogs/unreleased/osw-update-mr-metrics-with-events-data.yml
@@ -0,0 +1,5 @@
+---
+title: Populate MR metrics with events table information (migration)
+merge_request: 23564
+author:
+type: performance
diff --git a/db/post_migrate/20181204154019_populate_mr_metrics_with_events_data.rb b/db/post_migrate/20181204154019_populate_mr_metrics_with_events_data.rb
new file mode 100644
index 00000000000..1e43e3dd790
--- /dev/null
+++ b/db/post_migrate/20181204154019_populate_mr_metrics_with_events_data.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class PopulateMrMetricsWithEventsData < ActiveRecord::Migration[4.2]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+ BATCH_SIZE = 10_000
+ MIGRATION = 'PopulateMergeRequestMetricsWithEventsDataImproved'
+ PREVIOUS_MIGRATION = 'PopulateMergeRequestMetricsWithEventsData'
+
+ disable_ddl_transaction!
+
+ def up
+ # Perform any ongoing background migration that might still be running from
+ # previous try (see https://gitlab.com/gitlab-org/gitlab-ce/issues/47676).
+ Gitlab::BackgroundMigration.steal(PREVIOUS_MIGRATION)
+
+ say 'Scheduling `PopulateMergeRequestMetricsWithEventsData` jobs'
+ # It will update around 4_000_000 records in batches of 10_000 merge
+ # requests (running between 5 minutes) and should take around 53 hours to complete.
+ # Apparently, production PostgreSQL is able to vacuum 10k-20k dead_tuples
+ # per minute. So this should give us enough space.
+ #
+ # More information about the updates in `PopulateMergeRequestMetricsWithEventsDataImproved` class.
+ #
+ MergeRequest.all.each_batch(of: BATCH_SIZE) do |relation, index|
+ range = relation.pluck('MIN(id)', 'MAX(id)').first
+
+ BackgroundMigrationWorker.perform_in(index * 8.minutes, MIGRATION, range)
+ end
+ end
+
+ def down
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index ff2dde3243c..e5e19eb7745 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20181203002526) do
+ActiveRecord::Schema.define(version: 20181204154019) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
diff --git a/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved.rb b/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved.rb
new file mode 100644
index 00000000000..37592d67dd9
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved.rb
@@ -0,0 +1,99 @@
+# frozen_string_literal: true
+# rubocop:disable Style/Documentation
+
+module Gitlab
+ module BackgroundMigration
+ class PopulateMergeRequestMetricsWithEventsDataImproved
+ CLOSED_EVENT_ACTION = 3
+ MERGED_EVENT_ACTION = 7
+
+ def perform(min_merge_request_id, max_merge_request_id)
+ insert_metrics_for_range(min_merge_request_id, max_merge_request_id)
+ update_metrics_with_events_data(min_merge_request_id, max_merge_request_id)
+ end
+
+ # Inserts merge_request_metrics records for merge_requests without it for
+ # a given merge request batch.
+ def insert_metrics_for_range(min, max)
+ metrics_not_exists_clause =
+ <<-SQL.strip_heredoc
+ NOT EXISTS (SELECT 1 FROM merge_request_metrics
+ WHERE merge_request_metrics.merge_request_id = merge_requests.id)
+ SQL
+
+ MergeRequest.where(metrics_not_exists_clause).where(id: min..max).each_batch do |batch|
+ select_sql = batch.select(:id, :created_at, :updated_at).to_sql
+
+ execute("INSERT INTO merge_request_metrics (merge_request_id, created_at, updated_at) #{select_sql}")
+ end
+ end
+
+ def update_metrics_with_events_data(min, max)
+ if Gitlab::Database.postgresql?
+ psql_update_metrics_with_events_data(min, max)
+ else
+ mysql_update_metrics_with_events_data(min, max)
+ end
+ end
+
+ def psql_update_metrics_with_events_data(min, max)
+ update_sql = <<-SQL.strip_heredoc
+ UPDATE merge_request_metrics
+ SET (latest_closed_at,
+ latest_closed_by_id) =
+ ( SELECT updated_at,
+ author_id
+ FROM events
+ WHERE target_id = merge_request_id
+ AND target_type = 'MergeRequest'
+ AND action = #{CLOSED_EVENT_ACTION}
+ ORDER BY id DESC
+ LIMIT 1 ),
+ merged_by_id =
+ ( SELECT author_id
+ FROM events
+ WHERE target_id = merge_request_id
+ AND target_type = 'MergeRequest'
+ AND action = #{MERGED_EVENT_ACTION}
+ ORDER BY id DESC
+ LIMIT 1 )
+ WHERE merge_request_id BETWEEN #{min} AND #{max}
+ SQL
+
+ execute(update_sql)
+ end
+
+ def mysql_update_metrics_with_events_data(min, max)
+ closed_updated_at_subquery = mysql_events_select(:updated_at, CLOSED_EVENT_ACTION)
+ closed_author_id_subquery = mysql_events_select(:author_id, CLOSED_EVENT_ACTION)
+ merged_author_id_subquery = mysql_events_select(:author_id, MERGED_EVENT_ACTION)
+
+ update_sql = <<-SQL.strip_heredoc
+ UPDATE merge_request_metrics
+ SET latest_closed_at = (#{closed_updated_at_subquery}),
+ latest_closed_by_id = (#{closed_author_id_subquery}),
+ merged_by_id = (#{merged_author_id_subquery})
+ WHERE merge_request_id BETWEEN #{min} AND #{max}
+ SQL
+
+ execute(update_sql)
+ end
+
+ def mysql_events_select(column, action)
+ <<-SQL.strip_heredoc
+ SELECT #{column} FROM events
+ WHERE target_id = merge_request_id
+ AND target_type = 'MergeRequest'
+ AND action = #{action}
+ ORDER BY id DESC
+ LIMIT 1
+ SQL
+ end
+
+ def execute(sql)
+ @connection ||= ActiveRecord::Base.connection
+ @connection.execute(sql)
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved_spec.rb b/spec/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved_spec.rb
new file mode 100644
index 00000000000..d1d64574627
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved_spec.rb
@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+describe Gitlab::BackgroundMigration::PopulateMergeRequestMetricsWithEventsDataImproved, :migration, schema: 20181204154019 do
+ let(:namespaces) { table(:namespaces) }
+ let(:projects) { table(:projects) }
+ let(:users) { table(:users) }
+ let(:events) { table(:events) }
+
+ let(:user) { users.create!(email: 'test@example.com', projects_limit: 100, username: 'test') }
+
+ let(:namespace) { namespaces.create(name: 'gitlab', path: 'gitlab-org') }
+ let(:project) { projects.create(namespace_id: namespace.id, name: 'foo') }
+ let(:merge_requests) { table(:merge_requests) }
+
+ def create_merge_request(id, params = {})
+ params.merge!(id: id,
+ target_project_id: project.id,
+ target_branch: 'master',
+ source_project_id: project.id,
+ source_branch: 'mr name',
+ title: "mr name#{id}")
+
+ merge_requests.create(params)
+ end
+
+ def create_merge_request_event(id, params = {})
+ params.merge!(id: id,
+ project_id: project.id,
+ author_id: user.id,
+ target_type: 'MergeRequest')
+
+ events.create(params)
+ end
+
+ describe '#perform' do
+ it 'creates and updates closed and merged events' do
+ timestamp = Time.new('2018-01-01 12:00:00').utc
+
+ create_merge_request(1)
+ create_merge_request_event(1, target_id: 1, action: 3, updated_at: timestamp)
+ create_merge_request_event(2, target_id: 1, action: 3, updated_at: timestamp + 10.seconds)
+
+ create_merge_request_event(3, target_id: 1, action: 7, updated_at: timestamp)
+ create_merge_request_event(4, target_id: 1, action: 7, updated_at: timestamp + 10.seconds)
+
+ subject.perform(1, 1)
+
+ merge_request = MergeRequest.first
+
+ expect(merge_request.metrics).to have_attributes(latest_closed_by_id: user.id,
+ latest_closed_at: timestamp + 10.seconds,
+ merged_by_id: user.id)
+ end
+ end
+end
diff --git a/spec/migrations/populate_mr_metrics_with_events_data_spec.rb b/spec/migrations/populate_mr_metrics_with_events_data_spec.rb
new file mode 100644
index 00000000000..291a52b904d
--- /dev/null
+++ b/spec/migrations/populate_mr_metrics_with_events_data_spec.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20181204154019_populate_mr_metrics_with_events_data.rb')
+
+describe PopulateMrMetricsWithEventsData, :migration, :sidekiq do
+ let(:namespaces) { table(:namespaces) }
+ let(:projects) { table(:projects) }
+ let(:namespace) { namespaces.create(name: 'gitlab', path: 'gitlab-org') }
+ let(:project) { projects.create(namespace_id: namespace.id, name: 'foo') }
+ let(:merge_requests) { table(:merge_requests) }
+
+ def create_merge_request(id)
+ params = {
+ id: id,
+ target_project_id: project.id,
+ target_branch: 'master',
+ source_project_id: project.id,
+ source_branch: 'mr name',
+ title: "mr name#{id}"
+ }
+
+ merge_requests.create!(params)
+ end
+
+ it 'correctly schedules background migrations' do
+ create_merge_request(1)
+ create_merge_request(2)
+ create_merge_request(3)
+
+ stub_const("#{described_class.name}::BATCH_SIZE", 2)
+
+ Sidekiq::Testing.fake! do
+ Timecop.freeze do
+ migrate!
+
+ expect(described_class::MIGRATION)
+ .to be_scheduled_delayed_migration(8.minutes, 1, 2)
+
+ expect(described_class::MIGRATION)
+ .to be_scheduled_delayed_migration(16.minutes, 3, 3)
+
+ expect(BackgroundMigrationWorker.jobs.size).to eq(2)
+ end
+ end
+ end
+end