summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2020-02-13 18:09:00 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2020-02-13 18:09:00 +0000
commite4dffdfe364af6c72dcb6b4671cb39a24e8e100c (patch)
tree6428a1c3472b14396645dcb280b219dbc0420c66 /lib
parent0ab47b994caa80c5587f33dc818626b66cfdafe2 (diff)
downloadgitlab-ce-e4dffdfe364af6c72dcb6b4671cb39a24e8e100c.tar.gz
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/looping_batcher.rb99
1 files changed, 99 insertions, 0 deletions
diff --git a/lib/gitlab/looping_batcher.rb b/lib/gitlab/looping_batcher.rb
new file mode 100644
index 00000000000..adf0aeda506
--- /dev/null
+++ b/lib/gitlab/looping_batcher.rb
@@ -0,0 +1,99 @@
+# frozen_string_literal: true
+
+module Gitlab
+ # Returns an ID range within a table so it can be iterated over. Repeats from
+ # the beginning after it reaches the end.
+ #
+ # Used by Geo in particular to iterate over a replicable and its registry
+ # table.
+ #
+ # Tracks a cursor for each table, by "key". If the table is smaller than
+ # batch_size, then a range for the whole table is returned on every call.
+ class LoopingBatcher
+ # @param [Class] model_class the class of the table to iterate on
+ # @param [String] key to identify the cursor. Note, cursor is already unique
+ # per table.
+ # @param [Integer] batch_size to limit the number of records in a batch
+ def initialize(model_class, key:, batch_size: 1000)
+ @model_class = model_class
+ @key = key
+ @batch_size = batch_size
+ end
+
+ # @return [Range] a range of IDs. `nil` if 0 records at or after the cursor.
+ def next_range!
+ return unless @model_class.any?
+
+ batch_first_id = cursor_id
+
+ batch_last_id = get_batch_last_id(batch_first_id)
+ return unless batch_last_id
+
+ batch_first_id..batch_last_id
+ end
+
+ private
+
+ # @private
+ #
+ # Get the last ID of the batch. Increment the cursor or reset it if at end.
+ #
+ # @param [Integer] batch_first_id the first ID of the batch
+ # @return [Integer] batch_last_id the last ID of the batch (not the table)
+ def get_batch_last_id(batch_first_id)
+ batch_last_id, more_rows = run_query(@model_class.table_name, @model_class.primary_key, batch_first_id, @batch_size)
+
+ if more_rows
+ increment_batch(batch_last_id)
+ else
+ reset if batch_first_id > 1
+ end
+
+ batch_last_id
+ end
+
+ def run_query(table, primary_key, batch_first_id, batch_size)
+ sql = <<~SQL
+ SELECT MAX(batch.id) AS batch_last_id,
+ EXISTS (
+ SELECT #{primary_key}
+ FROM #{table}
+ WHERE #{primary_key} > MAX(batch.id)
+ ) AS more_rows
+ FROM (
+ SELECT #{primary_key}
+ FROM #{table}
+ WHERE #{primary_key} >= #{batch_first_id}
+ ORDER BY #{primary_key}
+ LIMIT #{batch_size}) AS batch;
+ SQL
+
+ result = ActiveRecord::Base.connection.exec_query(sql).first
+
+ [result["batch_last_id"], result["more_rows"]]
+ end
+
+ def reset
+ set_cursor_id(1)
+ end
+
+ def increment_batch(batch_last_id)
+ set_cursor_id(batch_last_id + 1)
+ end
+
+ # @private
+ #
+ # @return [Integer] the cursor ID, or 1 if it is not set
+ def cursor_id
+ Rails.cache.fetch("#{cache_key}:cursor_id") || 1
+ end
+
+ def set_cursor_id(id)
+ Rails.cache.write("#{cache_key}:cursor_id", id)
+ end
+
+ def cache_key
+ @cache_key ||= "#{self.class.name.parameterize}:#{@model_class.name.parameterize}:#{@key}:cursor_id"
+ end
+ end
+end