diff options
Diffstat (limited to 'db/post_migrate/20210731132939_backfill_stage_event_hash.rb')
-rw-r--r-- | db/post_migrate/20210731132939_backfill_stage_event_hash.rb | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/db/post_migrate/20210731132939_backfill_stage_event_hash.rb b/db/post_migrate/20210731132939_backfill_stage_event_hash.rb new file mode 100644 index 00000000000..2c4dc904387 --- /dev/null +++ b/db/post_migrate/20210731132939_backfill_stage_event_hash.rb @@ -0,0 +1,115 @@ +# frozen_string_literal: true + +class BackfillStageEventHash < ActiveRecord::Migration[6.1] + include Gitlab::Database::MigrationHelpers + + disable_ddl_transaction! + + BATCH_SIZE = 100 + EVENT_ID_IDENTIFIER_MAPPING = { + 1 => :issue_created, + 2 => :issue_first_mentioned_in_commit, + 3 => :issue_closed, + 4 => :issue_first_added_to_board, + 5 => :issue_first_associated_with_milestone, + 7 => :issue_last_edited, + 8 => :issue_label_added, + 9 => :issue_label_removed, + 10 => :issue_deployed_to_production, + 100 => :merge_request_created, + 101 => :merge_request_first_deployed_to_production, + 102 => :merge_request_last_build_finished, + 103 => :merge_request_last_build_started, + 104 => :merge_request_merged, + 105 => :merge_request_closed, + 106 => :merge_request_last_edited, + 107 => :merge_request_label_added, + 108 => :merge_request_label_removed, + 109 => :merge_request_first_commit_at, + 1000 => :code_stage_start, + 1001 => :issue_stage_end, + 1002 => :plan_stage_start + }.freeze + + LABEL_BASED_EVENTS = Set.new([8, 9, 107, 108]).freeze + + class GroupStage < ActiveRecord::Base + include EachBatch + + self.table_name = 'analytics_cycle_analytics_group_stages' + end + + class ProjectStage < ActiveRecord::Base + include EachBatch + + self.table_name = 'analytics_cycle_analytics_project_stages' + end + + class StageEventHash < ActiveRecord::Base + self.table_name = 'analytics_cycle_analytics_stage_event_hashes' + end + + def up + GroupStage.reset_column_information + ProjectStage.reset_column_information + StageEventHash.reset_column_information + + update_stage_table(GroupStage) + update_stage_table(ProjectStage) + + add_not_null_constraint :analytics_cycle_analytics_group_stages, :stage_event_hash_id + add_not_null_constraint :analytics_cycle_analytics_project_stages, :stage_event_hash_id + end + + def down + remove_not_null_constraint :analytics_cycle_analytics_group_stages, :stage_event_hash_id + remove_not_null_constraint :analytics_cycle_analytics_project_stages, :stage_event_hash_id + end + + private + + def update_stage_table(klass) + klass.each_batch(of: BATCH_SIZE) do |relation| + klass.transaction do + records = relation.where(stage_event_hash_id: nil).lock!.to_a # prevent concurrent modification (unlikely to happen) + records = delete_invalid_records(records) + next if records.empty? + + hashes_by_stage = records.to_h { |stage| [stage, calculate_stage_events_hash(stage)] } + hashes = hashes_by_stage.values.uniq + + StageEventHash.insert_all(hashes.map { |hash| { hash_sha256: hash } }) + + stage_event_hashes_by_hash = StageEventHash.where(hash_sha256: hashes).index_by(&:hash_sha256) + records.each do |stage| + stage.update!(stage_event_hash_id: stage_event_hashes_by_hash[hashes_by_stage[stage]].id) + end + end + end + end + + def calculate_stage_events_hash(stage) + start_event_hash = calculate_event_hash(stage.start_event_identifier, stage.start_event_label_id) + end_event_hash = calculate_event_hash(stage.end_event_identifier, stage.end_event_label_id) + + Digest::SHA256.hexdigest("#{start_event_hash}-#{end_event_hash}") + end + + def calculate_event_hash(event_identifier, label_id = nil) + str = EVENT_ID_IDENTIFIER_MAPPING.fetch(event_identifier).to_s + str << "-#{label_id}" if LABEL_BASED_EVENTS.include?(event_identifier) + + Digest::SHA256.hexdigest(str) + end + + # Invalid records are safe to delete, since they are not working properly anyway + def delete_invalid_records(records) + to_be_deleted = records.select do |record| + EVENT_ID_IDENTIFIER_MAPPING[record.start_event_identifier].nil? || + EVENT_ID_IDENTIFIER_MAPPING[record.end_event_identifier].nil? + end + + to_be_deleted.each(&:delete) + records - to_be_deleted + end +end |