diff options
6 files changed, 247 insertions, 1 deletions
diff --git a/changelogs/unreleased/osw-update-mr-metrics-with-events-data.yml b/changelogs/unreleased/osw-update-mr-metrics-with-events-data.yml new file mode 100644 index 00000000000..09a10a86adc --- /dev/null +++ b/changelogs/unreleased/osw-update-mr-metrics-with-events-data.yml @@ -0,0 +1,5 @@ +--- +title: Populate MR metrics with events table information (migration) +merge_request: 23564 +author: +type: performance diff --git a/db/post_migrate/20181204154019_populate_mr_metrics_with_events_data.rb b/db/post_migrate/20181204154019_populate_mr_metrics_with_events_data.rb new file mode 100644 index 00000000000..1e43e3dd790 --- /dev/null +++ b/db/post_migrate/20181204154019_populate_mr_metrics_with_events_data.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class PopulateMrMetricsWithEventsData < ActiveRecord::Migration[4.2] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 10_000 + MIGRATION = 'PopulateMergeRequestMetricsWithEventsDataImproved' + PREVIOUS_MIGRATION = 'PopulateMergeRequestMetricsWithEventsData' + + disable_ddl_transaction! + + def up + # Perform any ongoing background migration that might still be running from + # previous try (see https://gitlab.com/gitlab-org/gitlab-ce/issues/47676). + Gitlab::BackgroundMigration.steal(PREVIOUS_MIGRATION) + + say 'Scheduling `PopulateMergeRequestMetricsWithEventsData` jobs' + # It will update around 4_000_000 records in batches of 10_000 merge + # requests (running between 5 minutes) and should take around 53 hours to complete. + # Apparently, production PostgreSQL is able to vacuum 10k-20k dead_tuples + # per minute. So this should give us enough space. + # + # More information about the updates in `PopulateMergeRequestMetricsWithEventsDataImproved` class. + # + MergeRequest.all.each_batch(of: BATCH_SIZE) do |relation, index| + range = relation.pluck('MIN(id)', 'MAX(id)').first + + BackgroundMigrationWorker.perform_in(index * 8.minutes, MIGRATION, range) + end + end + + def down + end +end diff --git a/db/schema.rb b/db/schema.rb index ff2dde3243c..e5e19eb7745 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20181203002526) do +ActiveRecord::Schema.define(version: 20181204154019) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" diff --git a/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved.rb b/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved.rb new file mode 100644 index 00000000000..37592d67dd9 --- /dev/null +++ b/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class PopulateMergeRequestMetricsWithEventsDataImproved + CLOSED_EVENT_ACTION = 3 + MERGED_EVENT_ACTION = 7 + + def perform(min_merge_request_id, max_merge_request_id) + insert_metrics_for_range(min_merge_request_id, max_merge_request_id) + update_metrics_with_events_data(min_merge_request_id, max_merge_request_id) + end + + # Inserts merge_request_metrics records for merge_requests without it for + # a given merge request batch. + def insert_metrics_for_range(min, max) + metrics_not_exists_clause = + <<-SQL.strip_heredoc + NOT EXISTS (SELECT 1 FROM merge_request_metrics + WHERE merge_request_metrics.merge_request_id = merge_requests.id) + SQL + + MergeRequest.where(metrics_not_exists_clause).where(id: min..max).each_batch do |batch| + select_sql = batch.select(:id, :created_at, :updated_at).to_sql + + execute("INSERT INTO merge_request_metrics (merge_request_id, created_at, updated_at) #{select_sql}") + end + end + + def update_metrics_with_events_data(min, max) + if Gitlab::Database.postgresql? + psql_update_metrics_with_events_data(min, max) + else + mysql_update_metrics_with_events_data(min, max) + end + end + + def psql_update_metrics_with_events_data(min, max) + update_sql = <<-SQL.strip_heredoc + UPDATE merge_request_metrics + SET (latest_closed_at, + latest_closed_by_id) = + ( SELECT updated_at, + author_id + FROM events + WHERE target_id = merge_request_id + AND target_type = 'MergeRequest' + AND action = #{CLOSED_EVENT_ACTION} + ORDER BY id DESC + LIMIT 1 ), + merged_by_id = + ( SELECT author_id + FROM events + WHERE target_id = merge_request_id + AND target_type = 'MergeRequest' + AND action = #{MERGED_EVENT_ACTION} + ORDER BY id DESC + LIMIT 1 ) + WHERE merge_request_id BETWEEN #{min} AND #{max} + SQL + + execute(update_sql) + end + + def mysql_update_metrics_with_events_data(min, max) + closed_updated_at_subquery = mysql_events_select(:updated_at, CLOSED_EVENT_ACTION) + closed_author_id_subquery = mysql_events_select(:author_id, CLOSED_EVENT_ACTION) + merged_author_id_subquery = mysql_events_select(:author_id, MERGED_EVENT_ACTION) + + update_sql = <<-SQL.strip_heredoc + UPDATE merge_request_metrics + SET latest_closed_at = (#{closed_updated_at_subquery}), + latest_closed_by_id = (#{closed_author_id_subquery}), + merged_by_id = (#{merged_author_id_subquery}) + WHERE merge_request_id BETWEEN #{min} AND #{max} + SQL + + execute(update_sql) + end + + def mysql_events_select(column, action) + <<-SQL.strip_heredoc + SELECT #{column} FROM events + WHERE target_id = merge_request_id + AND target_type = 'MergeRequest' + AND action = #{action} + ORDER BY id DESC + LIMIT 1 + SQL + end + + def execute(sql) + @connection ||= ActiveRecord::Base.connection + @connection.execute(sql) + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved_spec.rb b/spec/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved_spec.rb new file mode 100644 index 00000000000..d1d64574627 --- /dev/null +++ b/spec/lib/gitlab/background_migration/populate_merge_request_metrics_with_events_data_improved_spec.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe Gitlab::BackgroundMigration::PopulateMergeRequestMetricsWithEventsDataImproved, :migration, schema: 20181204154019 do + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:users) { table(:users) } + let(:events) { table(:events) } + + let(:user) { users.create!(email: 'test@example.com', projects_limit: 100, username: 'test') } + + let(:namespace) { namespaces.create(name: 'gitlab', path: 'gitlab-org') } + let(:project) { projects.create(namespace_id: namespace.id, name: 'foo') } + let(:merge_requests) { table(:merge_requests) } + + def create_merge_request(id, params = {}) + params.merge!(id: id, + target_project_id: project.id, + target_branch: 'master', + source_project_id: project.id, + source_branch: 'mr name', + title: "mr name#{id}") + + merge_requests.create(params) + end + + def create_merge_request_event(id, params = {}) + params.merge!(id: id, + project_id: project.id, + author_id: user.id, + target_type: 'MergeRequest') + + events.create(params) + end + + describe '#perform' do + it 'creates and updates closed and merged events' do + timestamp = Time.new('2018-01-01 12:00:00').utc + + create_merge_request(1) + create_merge_request_event(1, target_id: 1, action: 3, updated_at: timestamp) + create_merge_request_event(2, target_id: 1, action: 3, updated_at: timestamp + 10.seconds) + + create_merge_request_event(3, target_id: 1, action: 7, updated_at: timestamp) + create_merge_request_event(4, target_id: 1, action: 7, updated_at: timestamp + 10.seconds) + + subject.perform(1, 1) + + merge_request = MergeRequest.first + + expect(merge_request.metrics).to have_attributes(latest_closed_by_id: user.id, + latest_closed_at: timestamp + 10.seconds, + merged_by_id: user.id) + end + end +end diff --git a/spec/migrations/populate_mr_metrics_with_events_data_spec.rb b/spec/migrations/populate_mr_metrics_with_events_data_spec.rb new file mode 100644 index 00000000000..291a52b904d --- /dev/null +++ b/spec/migrations/populate_mr_metrics_with_events_data_spec.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20181204154019_populate_mr_metrics_with_events_data.rb') + +describe PopulateMrMetricsWithEventsData, :migration, :sidekiq do + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:namespace) { namespaces.create(name: 'gitlab', path: 'gitlab-org') } + let(:project) { projects.create(namespace_id: namespace.id, name: 'foo') } + let(:merge_requests) { table(:merge_requests) } + + def create_merge_request(id) + params = { + id: id, + target_project_id: project.id, + target_branch: 'master', + source_project_id: project.id, + source_branch: 'mr name', + title: "mr name#{id}" + } + + merge_requests.create!(params) + end + + it 'correctly schedules background migrations' do + create_merge_request(1) + create_merge_request(2) + create_merge_request(3) + + stub_const("#{described_class.name}::BATCH_SIZE", 2) + + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(described_class::MIGRATION) + .to be_scheduled_delayed_migration(8.minutes, 1, 2) + + expect(described_class::MIGRATION) + .to be_scheduled_delayed_migration(16.minutes, 3, 3) + + expect(BackgroundMigrationWorker.jobs.size).to eq(2) + end + end + end +end |