app/models/internal_id.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343

# frozen_string_literal: true

# An InternalId is a strictly monotone sequence of integers
# generated for a given scope and usage.
#
# The monotone sequence may be broken if an ID is explicitly provided
# to `.track_greatest_and_save!` or `#track_greatest`.
#
# For example, issues use their project to scope internal ids:
# In that sense, scope is "project" and usage is "issues".
# Generated internal ids for an issue are unique per project.
#
# See InternalId#usage enum for available usages.
#
# In order to leverage InternalId for other usages, the idea is to
# * Add `usage` value to enum
# * (Optionally) add columns to `internal_ids` if needed for scope.
class InternalId < ApplicationRecord
  extend Gitlab::Utils::StrongMemoize

  belongs_to :project
  belongs_to :namespace

  enum usage: Enums::InternalId.usage_resources

  validates :usage, presence: true

  scope :filter_by, -> (scope, usage) do
    where(**scope, usage: usage)
  end

  # Increments #last_value and saves the record
  #
  # The operation locks the record and gathers a `ROW SHARE` lock (in PostgreSQL).
  # As such, the increment is atomic and safe to be called concurrently.
  def increment_and_save!
    update_and_save { self.last_value = (last_value || 0) + 1 }
  end

  # Increments #last_value with new_value if it is greater than the current,
  # and saves the record
  #
  # The operation locks the record and gathers a `ROW SHARE` lock (in PostgreSQL).
  # As such, the increment is atomic and safe to be called concurrently.
  def track_greatest_and_save!(new_value)
    update_and_save { self.last_value = [last_value || 0, new_value].max }
  end

  private

  def update_and_save(&block)
    lock!
    yield
    save!
    last_value
  end

  class << self
    def track_greatest(subject, scope, usage, new_value, init)
      build_generator(subject, scope, usage, init).track_greatest(new_value)
    end

    def generate_next(subject, scope, usage, init)
      build_generator(subject, scope, usage, init).generate
    end

    def reset(subject, scope, usage, value)
      build_generator(subject, scope, usage).reset(value)
    end

    # Flushing records is generally safe in a sense that those
    # records are going to be re-created when needed.
    #
    # A filter condition has to be provided to not accidentally flush
    # records for all projects.
    def flush_records!(filter)
      raise ArgumentError, "filter cannot be empty" if filter.blank?

      where(filter).delete_all
    end

    def internal_id_transactions_increment(operation:, usage:)
      self.internal_id_transactions_total.increment(
        operation: operation,
        usage: usage.to_s,
        in_transaction: ActiveRecord::Base.connection.transaction_open?.to_s # rubocop: disable Database/MultipleDatabases
      )
    end

    def internal_id_transactions_total
      strong_memoize(:internal_id_transactions_total) do
        name = :gitlab_internal_id_transactions_total
        comment = 'Counts all the internal ids happening within transaction'

        Gitlab::Metrics.counter(name, comment)
      end
    end

    private

    def build_generator(subject, scope, usage, init = nil)
      if Feature.enabled?(:generate_iids_without_explicit_locking)
        ImplicitlyLockingInternalIdGenerator.new(subject, scope, usage, init)
      else
        InternalIdGenerator.new(subject, scope, usage, init)
      end
    end
  end

  class InternalIdGenerator
    # Generate next internal id for a given scope and usage.
    #
    # For currently supported usages, see #usage enum.
    #
    # The method implements a locking scheme that has the following properties:
    # 1) Generated sequence of internal ids is unique per (scope and usage)
    # 2) The method is thread-safe and may be used in concurrent threads/processes.
    # 3) The generated sequence is gapless.
    # 4) In the absence of a record in the internal_ids table, one will be created
    #    and last_value will be calculated on the fly.
    #
    # subject: The instance or class we're generating an internal id for.
    # scope: Attributes that define the scope for id generation.
    #        Valid keys are `project/project_id` and `namespace/namespace_id`.
    # usage: Symbol to define the usage of the internal id, see InternalId.usages
    # init: Proc that accepts the subject and the scope and returns Integer|NilClass
    attr_reader :subject, :scope, :scope_attrs, :usage, :init

    def initialize(subject, scope, usage, init = nil)
      @subject = subject
      @scope = scope
      @usage = usage
      @init = init

      raise ArgumentError, 'Scope is not well-defined, need at least one column for scope (given: 0)' if scope.empty?

      unless InternalId.usages.has_key?(usage.to_s)
        raise ArgumentError, "Usage '#{usage}' is unknown. Supported values are #{InternalId.usages.keys} from InternalId.usages"
      end
    end

    # Generates next internal id and returns it
    # init: Block that gets called to initialize InternalId record if not present
    #       Make sure to not throw exceptions in the absence of records (if this is expected).
    def generate
      InternalId.internal_id_transactions_increment(operation: :generate, usage: usage)

      subject.transaction do
        # Create a record in internal_ids if one does not yet exist
        # and increment its last value
        #
        # Note this will acquire a ROW SHARE lock on the InternalId record
        record.increment_and_save!
      end
    end

    # Reset tries to rewind to `value-1`. This will only succeed,
    # if `value` stored in database is equal to `last_value`.
    # value: The expected last_value to decrement
    def reset(value)
      return false unless value

      InternalId.internal_id_transactions_increment(operation: :reset, usage: usage)

      updated =
        InternalId
          .where(**scope, usage: usage_value)
          .where(last_value: value)
          .update_all('last_value = last_value - 1')

      updated > 0
    end

    # Create a record in internal_ids if one does not yet exist
    # and set its new_value if it is higher than the current last_value
    #
    # Note this will acquire a ROW SHARE lock on the InternalId record

    def track_greatest(new_value)
      InternalId.internal_id_transactions_increment(operation: :track_greatest, usage: usage)

      subject.transaction do
        record.track_greatest_and_save!(new_value)
      end
    end

    def record
      @record ||= (lookup || create_record)
    end

    def with_lock(&block)
      InternalId.internal_id_transactions_increment(operation: :with_lock, usage: usage)

      record.with_lock(&block)
    end

    private

    # Retrieve InternalId record for (project, usage) combination, if it exists
    def lookup
      InternalId.find_by(**scope, usage: usage_value)
    end

    def usage_value
      @usage_value ||= InternalId.usages[usage.to_s]
    end

    # Create InternalId record for (scope, usage) combination, if it doesn't exist
    #
    # We blindly insert without synchronization. If another process
    # was faster in doing this, we'll realize once we hit the unique key constraint
    # violation. We can safely roll-back the nested transaction and perform
    # a lookup instead to retrieve the record.
    def create_record
      raise ArgumentError, 'Cannot initialize without init!' unless init

      instance = subject.is_a?(::Class) ? nil : subject

      subject.transaction(requires_new: true) do
        InternalId.create!(
          **scope,
          usage: usage_value,
          last_value: init.call(instance, scope) || 0
        )
      end
    rescue ActiveRecord::RecordNotUnique
      lookup
    end
  end

  class ImplicitlyLockingInternalIdGenerator
    # Generate next internal id for a given scope and usage.
    #
    # For currently supported usages, see #usage enum.
    #
    # The method implements a locking scheme that has the following properties:
    # 1) Generated sequence of internal ids is unique per (scope and usage)
    # 2) The method is thread-safe and may be used in concurrent threads/processes.
    # 3) The generated sequence is gapless.
    # 4) In the absence of a record in the internal_ids table, one will be created
    #    and last_value will be calculated on the fly.
    #
    # subject: The instance or class we're generating an internal id for.
    # scope: Attributes that define the scope for id generation.
    #        Valid keys are `project/project_id` and `namespace/namespace_id`.
    # usage: Symbol to define the usage of the internal id, see InternalId.usages
    # init: Proc that accepts the subject and the scope and returns Integer|NilClass
    attr_reader :subject, :scope, :scope_attrs, :usage, :init

    def initialize(subject, scope, usage, init = nil)
      @subject = subject
      @scope = scope
      @usage = usage
      @init = init

      raise ArgumentError, 'Scope is not well-defined, need at least one column for scope (given: 0)' if scope.empty?

      unless InternalId.usages.has_key?(usage.to_s)
        raise ArgumentError, "Usage '#{usage}' is unknown. Supported values are #{InternalId.usages.keys} from InternalId.usages"
      end
    end

    # Generates next internal id and returns it
    # init: Block that gets called to initialize InternalId record if not present
    #       Make sure to not throw exceptions in the absence of records (if this is expected).
    def generate
      InternalId.internal_id_transactions_increment(operation: :generate, usage: usage)

      next_iid = update_record!(subject, scope, usage, arel_table[:last_value] + 1)

      return next_iid if next_iid

      create_record!(subject, scope, usage, init) do |iid|
        iid.last_value += 1
      end
    rescue ActiveRecord::RecordNotUnique
      retry
    end

    # Reset tries to rewind to `value-1`. This will only succeed,
    # if `value` stored in database is equal to `last_value`.
    # value: The expected last_value to decrement
    def reset(value)
      return false unless value

      InternalId.internal_id_transactions_increment(operation: :reset, usage: usage)

      iid = update_record!(subject, scope.merge(last_value: value), usage, arel_table[:last_value] - 1)
      iid == value - 1
    end

    # Create a record in internal_ids if one does not yet exist
    # and set its new_value if it is higher than the current last_value
    def track_greatest(new_value)
      InternalId.internal_id_transactions_increment(operation: :track_greatest, usage: usage)

      function = Arel::Nodes::NamedFunction.new('GREATEST', [
        arel_table[:last_value],
        new_value.to_i
      ])

      next_iid = update_record!(subject, scope, usage, function)
      return next_iid if next_iid

      create_record!(subject, scope, usage, init) do |object|
        object.last_value = [object.last_value, new_value].max
      end
    rescue ActiveRecord::RecordNotUnique
      retry
    end

    private

    def update_record!(subject, scope, usage, new_value)
      stmt = Arel::UpdateManager.new
      stmt.table(arel_table)
      stmt.set(arel_table[:last_value] => new_value)
      stmt.wheres = InternalId.filter_by(scope, usage).arel.constraints

      ActiveRecord::Base.connection.insert(stmt, 'Update InternalId', 'last_value') # rubocop: disable Database/MultipleDatabases
    end

    def create_record!(subject, scope, usage, init)
      raise ArgumentError, 'Cannot initialize without init!' unless init

      instance = subject.is_a?(::Class) ? nil : subject

      subject.transaction(requires_new: true) do
        last_value = init.call(instance, scope) || 0

        internal_id = InternalId.create!(**scope, usage: usage, last_value: last_value) do |subject|
          yield subject if block_given?
        end

        internal_id.last_value
      end
    end

    def arel_table
      InternalId.arel_table
    end
  end
end