diff options
Diffstat (limited to 'lib/gitlab/database/consistency_checker.rb')
-rw-r--r-- | lib/gitlab/database/consistency_checker.rb | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/lib/gitlab/database/consistency_checker.rb b/lib/gitlab/database/consistency_checker.rb new file mode 100644 index 00000000000..e398fef744c --- /dev/null +++ b/lib/gitlab/database/consistency_checker.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +module Gitlab + module Database + class ConsistencyChecker + BATCH_SIZE = 1000 + MAX_BATCHES = 25 + MAX_RUNTIME = 30.seconds # must be less than the scheduling frequency of the ConsistencyCheck jobs + + delegate :monotonic_time, to: :'Gitlab::Metrics::System' + + def initialize(source_model:, target_model:, source_columns:, target_columns:) + @source_model = source_model + @target_model = target_model + @source_columns = source_columns + @target_columns = target_columns + @source_sort_column = source_columns.first + @target_sort_column = target_columns.first + @result = { matches: 0, mismatches: 0, batches: 0, mismatches_details: [] } + end + + # rubocop:disable Metrics/AbcSize + def execute(start_id:) + current_start_id = start_id + + return build_result(next_start_id: nil) if max_id.nil? + return build_result(next_start_id: min_id) if current_start_id > max_id + + @start_time = monotonic_time + + MAX_BATCHES.times do + if (current_start_id <= max_id) && !over_time_limit? + ids_range = current_start_id...(current_start_id + BATCH_SIZE) + # rubocop: disable CodeReuse/ActiveRecord + source_data = source_model.where(source_sort_column => ids_range) + .order(source_sort_column => :asc).pluck(*source_columns) + target_data = target_model.where(target_sort_column => ids_range) + .order(target_sort_column => :asc).pluck(*target_columns) + # rubocop: enable CodeReuse/ActiveRecord + + current_start_id += BATCH_SIZE + result[:matches] += append_mismatches_details(source_data, target_data) + result[:batches] += 1 + else + break + end + end + + result[:mismatches] = result[:mismatches_details].length + metrics_counter.increment({ source_table: source_model.table_name, result: "match" }, result[:matches]) + metrics_counter.increment({ source_table: source_model.table_name, result: "mismatch" }, result[:mismatches]) + + build_result(next_start_id: current_start_id > max_id ? min_id : current_start_id) + end + # rubocop:enable Metrics/AbcSize + + private + + attr_reader :source_model, :target_model, :source_columns, :target_columns, + :source_sort_column, :target_sort_column, :start_time, :result + + def build_result(next_start_id:) + { next_start_id: next_start_id }.merge(result) + end + + def over_time_limit? + (monotonic_time - start_time) >= MAX_RUNTIME + end + + # This where comparing the items happen, and building the diff log + # It returns the number of matching elements + def append_mismatches_details(source_data, target_data) + # Mapping difference the sort key to the item values + # source - target + source_diff_hash = (source_data - target_data).index_by { |item| item.shift } + # target - source + target_diff_hash = (target_data - source_data).index_by { |item| item.shift } + + matches = source_data.length - source_diff_hash.length + + # Items that exist in the first table + Different items + source_diff_hash.each do |id, values| + result[:mismatches_details] << { + id: id, + source_table: values, + target_table: target_diff_hash[id] + } + end + + # Only the items that exist in the target table + target_diff_hash.each do |id, values| + next if source_diff_hash[id] # It's already added + + result[:mismatches_details] << { + id: id, + source_table: source_diff_hash[id], + target_table: values + } + end + + matches + end + + # rubocop: disable CodeReuse/ActiveRecord + def min_id + @min_id ||= source_model.minimum(source_sort_column) + end + + def max_id + @max_id ||= source_model.maximum(source_sort_column) + end + # rubocop: enable CodeReuse/ActiveRecord + + def metrics_counter + @metrics_counter ||= Gitlab::Metrics.counter( + :consistency_checks, + "Consistency Check Results" + ) + end + end + end +end |