summaryrefslogtreecommitdiff
path: root/app/services/database/consistency_check_service.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/services/database/consistency_check_service.rb')
-rw-r--r--app/services/database/consistency_check_service.rb109
1 files changed, 109 insertions, 0 deletions
diff --git a/app/services/database/consistency_check_service.rb b/app/services/database/consistency_check_service.rb
new file mode 100644
index 00000000000..e39bc8f25b8
--- /dev/null
+++ b/app/services/database/consistency_check_service.rb
@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+
+module Database
+ class ConsistencyCheckService
+ CURSOR_REDIS_KEY_TTL = 7.days
+ EMPTY_RESULT = { matches: 0, mismatches: 0, batches: 0, mismatches_details: [] }.freeze
+
+ def initialize(source_model:, target_model:, source_columns:, target_columns:)
+ @source_model = source_model
+ @target_model = target_model
+ @source_columns = source_columns
+ @target_columns = target_columns
+ @source_sort_column = source_columns.first
+ @target_sort_column = target_columns.first
+ end
+
+ # This class takes two ActiveRecord models, and compares the selected columns
+ # of the two models tables, for the purposes of checking the consistency of
+ # mirroring of tables. For example Namespace and Ci::NamepaceMirror
+ #
+ # It compares up to 25 batches (1000 records / batch), or up to 30 seconds
+ # for all the batches in total.
+ #
+ # It saves the cursor of the next start_id (cusror) in Redis. If the start_id
+ # wasn't saved in Redis, for example, in the first run, it will choose some random start_id
+ #
+ # Example:
+ # service = Database::ConsistencyCheckService.new(
+ # source_model: Namespace,
+ # target_model: Ci::NamespaceMirror,
+ # source_columns: %w[id traversal_ids],
+ # target_columns: %w[namespace_id traversal_ids],
+ # )
+ # result = service.execute
+ #
+ # result is a hash that has the following fields:
+ # - batches: Number of batches checked
+ # - matches: The number of matched records
+ # - mismatches: The number of mismatched records
+ # - mismatches_details: It's an array that contains details about the mismatched records.
+ # each record in this array is a hash of format {id: ID, source_table: [...], target_table: [...]}
+ # Each record represents the attributes of the records in the two tables.
+ # - start_id: The start id cursor of the current batch. <nil> means no records.
+ # - next_start_id: The ID that can be used for the next batch iteration check. <nil> means no records
+ def execute
+ start_id = next_start_id
+
+ return EMPTY_RESULT if start_id.nil?
+
+ result = consistency_checker.execute(start_id: start_id)
+ result[:start_id] = start_id
+
+ save_next_start_id(result[:next_start_id])
+
+ result
+ end
+
+ private
+
+ attr_reader :source_model, :target_model, :source_columns, :target_columns, :source_sort_column, :target_sort_column
+
+ def consistency_checker
+ @consistency_checker ||= Gitlab::Database::ConsistencyChecker.new(
+ source_model: source_model,
+ target_model: target_model,
+ source_columns: source_columns,
+ target_columns: target_columns
+ )
+ end
+
+ def next_start_id
+ return if min_id.nil?
+
+ fetch_next_start_id || random_start_id
+ end
+
+ # rubocop: disable CodeReuse/ActiveRecord
+ def min_id
+ @min_id ||= source_model.minimum(source_sort_column)
+ end
+
+ def max_id
+ @max_id ||= source_model.minimum(source_sort_column)
+ end
+ # rubocop: enable CodeReuse/ActiveRecord
+
+ def fetch_next_start_id
+ Gitlab::Redis::SharedState.with { |redis| redis.get(cursor_redis_shared_state_key)&.to_i }
+ end
+
+ # This returns some random start_id, so that we don't always start checking
+ # from the start of the table, in case we lose the cursor in Redis.
+ def random_start_id
+ range_start = min_id
+ range_end = [min_id, max_id - Gitlab::Database::ConsistencyChecker::BATCH_SIZE].max
+ rand(range_start..range_end)
+ end
+
+ def save_next_start_id(start_id)
+ Gitlab::Redis::SharedState.with do |redis|
+ redis.set(cursor_redis_shared_state_key, start_id, ex: CURSOR_REDIS_KEY_TTL)
+ end
+ end
+
+ def cursor_redis_shared_state_key
+ "consistency_check_cursor:#{source_model.table_name}:#{target_model.table_name}"
+ end
+ end
+end