summaryrefslogtreecommitdiff
path: root/app/models/namespaces/traversal/linear.rb
blob: 9006f104c64ea0594b4ed65f3f1a23a2b7d66238 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# frozen_string_literal: true
#
# Query a recursively defined namespace hierarchy using linear methods through
# the traversal_ids attribute.
#
# Namespace is a nested hierarchy of one parent to many children. A search
# using only the parent-child relationships is a slow operation. This process
# was previously optimized using Postgresql recursive common table expressions
# (CTE) with acceptable performance. However, it lead to slower than possible
# performance, and resulted in complicated queries that were difficult to make
# performant.
#
# Instead of searching the hierarchy recursively, we store a `traversal_ids`
# attribute on each node. The `traversal_ids` is an ordered array of Namespace
# IDs that define the traversal path from the root Namespace to the current
# Namespace.
#
# For example, suppose we have the following Namespaces:
#
# GitLab (id: 1) > Engineering (id: 2) > Manage (id: 3) > Access (id: 4)
#
# Then `traversal_ids` for group "Access" is [1, 2, 3, 4]
#
# And we can match against other Namespace `traversal_ids` such that:
#
# - Ancestors are [1], [1, 2], [1, 2, 3]
# - Descendants are [1, 2, 3, 4, *]
# - Root is [1]
# - Hierarchy is [1, *]
#
# Note that this search method works so long as the IDs are unique and the
# traversal path is ordered from root to leaf nodes.
#
# We implement this in the database using Postgresql arrays, indexed by a
# generalized inverted index (gin).
module Namespaces
  module Traversal
    module Linear
      extend ActiveSupport::Concern
      include LinearScopes

      UnboundedSearch = Class.new(StandardError)

      included do
        before_update :lock_both_roots, if: -> { parent_id_changed? }
        after_update :sync_traversal_ids, if: -> { saved_change_to_parent_id? }
        # This uses rails internal before_commit API to sync traversal_ids on namespace create, right before transaction is committed.
        # This helps reduce the time during which the root namespace record is locked to ensure updated traversal_ids are valid
        before_commit :sync_traversal_ids, on: [:create]
        after_commit :set_traversal_ids,
          if: -> { traversal_ids.empty? || saved_change_to_parent_id? },
          on: [:create, :update]

        define_model_callbacks :sync_traversal_ids
      end

      class_methods do
        # This method looks into a list of namespaces trying to optimise a returned traversal_ids
        # into a list of shortest prefixes, due to fact that the shortest prefixes include all childrens.
        # Example:
        # INPUT: [[4909902], [4909902,51065789], [4909902,51065793], [7135830], [15599674, 1], [15599674, 1, 3], [15599674, 2]]
        # RESULT: [[4909902], [7135830], [15599674, 1], [15599674, 2]]
        def shortest_traversal_ids_prefixes
          raise ArgumentError, 'Feature not supported since the `:use_traversal_ids` is disabled' unless use_traversal_ids?

          prefixes = []

          # The array needs to be sorted (O(nlogn)) to ensure shortest elements are always first
          # This allows to do O(n) search of shortest prefixes
          all_traversal_ids = all.order('namespaces.traversal_ids').pluck('namespaces.traversal_ids')
          last_prefix = [nil]

          all_traversal_ids.each do |traversal_ids|
            next if last_prefix == traversal_ids[0..(last_prefix.count - 1)]

            last_prefix = traversal_ids
            prefixes << traversal_ids
          end

          prefixes
        end
      end

      def traversal_ids=(ids)
        super(ids)
        self.transient_traversal_ids = nil
      end

      def traversal_ids
        read_attribute(:traversal_ids).presence || transient_traversal_ids || []
      end

      def use_traversal_ids?
        return false unless Feature.enabled?(:use_traversal_ids)

        traversal_ids.present?
      end

      def use_traversal_ids_for_self_and_hierarchy?
        return false unless use_traversal_ids?
        return false unless Feature.enabled?(:use_traversal_ids_for_self_and_hierarchy, root_ancestor)

        traversal_ids.present?
      end

      def use_traversal_ids_for_ancestors?
        return false unless use_traversal_ids?
        return false unless Feature.enabled?(:use_traversal_ids_for_ancestors, root_ancestor)

        traversal_ids.present?
      end

      def use_traversal_ids_for_ancestors_upto?
        return false unless use_traversal_ids?
        return false unless Feature.enabled?(:use_traversal_ids_for_ancestors_upto, root_ancestor)

        traversal_ids.present?
      end

      def root_ancestor
        strong_memoize(:root_ancestor) do
          if association(:parent).loaded? && parent.present?
            # This case is possible when parent has not been persisted or we're inside a transaction.
            parent.root_ancestor
          elsif parent_id.nil?
            # There is no parent, so we are the root ancestor.
            self
          else
            Namespace.find_by(id: traversal_ids.first)
          end
        end
      end

      def self_and_descendants
        return super unless use_traversal_ids?

        lineage(top: self)
      end

      def self_and_descendant_ids
        return super unless use_traversal_ids?

        self_and_descendants.as_ids
      end

      def descendants
        return super unless use_traversal_ids?

        self_and_descendants.where.not(id: id)
      end

      def self_and_hierarchy
        return super unless use_traversal_ids_for_self_and_hierarchy?

        self_and_descendants.or(ancestors)
      end

      def ancestors(hierarchy_order: nil)
        return super unless use_traversal_ids_for_ancestors?

        return self.class.none if parent_id.blank?

        lineage(bottom: parent, hierarchy_order: hierarchy_order)
      end

      def ancestor_ids(hierarchy_order: nil)
        return super unless use_traversal_ids_for_ancestors?

        hierarchy_order == :desc ? traversal_ids[0..-2] : traversal_ids[0..-2].reverse
      end

      # Returns all ancestors upto but excluding the top.
      # When no top is given, all ancestors are returned.
      # When top is not found, returns all ancestors.
      #
      # This copies the behavior of the recursive method. We will deprecate
      # this behavior soon.
      def ancestors_upto(top = nil, hierarchy_order: nil)
        return super unless use_traversal_ids_for_ancestors_upto?

        # We can't use a default value in the method definition above because
        # we need to preserve those specific parameters for super.
        hierarchy_order ||= :desc

        top_index = ancestors_upto_top_index(top)
        ids = traversal_ids[top_index...-1].reverse

        # WITH ORDINALITY lets us order the result to match traversal_ids order.
        ids_string = ids.map { |id| Integer(id) }.join(',')
        from_sql = <<~SQL
          unnest(ARRAY[#{ids_string}]::bigint[]) WITH ORDINALITY AS ancestors(id, ord)
          INNER JOIN namespaces ON namespaces.id = ancestors.id
        SQL

        self.class
          .from(Arel.sql(from_sql))
          .order('ancestors.ord': hierarchy_order)
      end

      def self_and_ancestors(hierarchy_order: nil)
        return super unless use_traversal_ids_for_ancestors?

        return self.class.where(id: id) if parent_id.blank?

        lineage(bottom: self, hierarchy_order: hierarchy_order)
      end

      def self_and_ancestor_ids(hierarchy_order: nil)
        return super unless use_traversal_ids_for_ancestors?

        hierarchy_order == :desc ? traversal_ids : traversal_ids.reverse
      end

      def parent=(obj)
        super(obj)
        set_traversal_ids
      end

      def parent_id=(id)
        super(id)
        set_traversal_ids
      end

      private

      attr_accessor :transient_traversal_ids

      # Update the traversal_ids for the full hierarchy.
      #
      # NOTE: self.traversal_ids will be stale. Reload for a fresh record.
      def sync_traversal_ids
        run_callbacks :sync_traversal_ids do
          # Clear any previously memoized root_ancestor as our ancestors have changed.
          clear_memoization(:root_ancestor)

          Namespace::TraversalHierarchy.for_namespace(self).sync_traversal_ids!
        end
      end

      def set_traversal_ids
        return if id.blank?

        # This is a temporary guard and will be removed.
        return if is_a?(Namespaces::ProjectNamespace)

        self.transient_traversal_ids = if parent_id
                                         parent.traversal_ids + [id]
                                       else
                                         [id]
                                       end

        # Clear root_ancestor memo if changed.
        if read_attribute(:traversal_ids)&.first != transient_traversal_ids.first
          clear_memoization(:root_ancestor)
        end

        # Update traversal_ids for any associated child objects.
        children.each(&:reload) if children.loaded?
      end

      # Lock the root of the hierarchy we just left, and lock the root of the hierarchy
      # we just joined. In most cases the two hierarchies will be the same.
      def lock_both_roots
        parent_ids = [
          parent_id_was || self.id,
          parent_id || self.id
        ].compact

        roots = Gitlab::ObjectHierarchy
          .new(Namespace.where(id: parent_ids))
          .base_and_ancestors
          .reorder(nil)
          .where(parent_id: nil)

        Namespace.lock.select(:id).where(id: roots).order(id: :asc).load
      end

      # Search this namespace's lineage. Bound inclusively by top node.
      def lineage(top: nil, bottom: nil, hierarchy_order: nil)
        raise UnboundedSearch, 'Must bound search by either top or bottom' unless top || bottom

        skope = self.class

        if top
          skope = skope.where("traversal_ids @> ('{?}')", top.id)
        end

        if bottom
          skope = skope.where(id: bottom.traversal_ids)
        end

        # The original `with_depth` attribute in ObjectHierarchy increments as you
        # walk away from the "base" namespace. This direction changes depending on
        # if you are walking up the ancestors or down the descendants.
        if hierarchy_order
          depth_sql = "ABS(#{traversal_ids.count} - array_length(traversal_ids, 1))"
          skope = skope.select(skope.default_select_columns, "#{depth_sql} as depth")
          # The SELECT includes an extra depth attribute. We wrap the SQL in a
          # standard SELECT to avoid mismatched attribute errors when trying to
          # chain future ActiveRelation commands, and retain the ordering.
          skope = self.class
            .from(skope, self.class.table_name)
            .select(skope.arel_table[Arel.star])
            .order(depth: hierarchy_order)
        end

        skope
      end

      def ancestors_upto_top_index(top)
        return 0 if top.nil?

        index = traversal_ids.find_index(top.id)
        if index.nil?
          0
        else
          index + 1
        end
      end
    end
  end
end