summaryrefslogtreecommitdiff
path: root/lib/gitlab/object_hierarchy.rb
blob: 9a74266693bfc854feb0027b02e03e0580c38c2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# frozen_string_literal: true

module Gitlab
  # Retrieving of parent or child objects based on a base ActiveRecord relation.
  #
  # This class uses recursive CTEs and as a result will only work on PostgreSQL.
  class ObjectHierarchy
    DEPTH_COLUMN = :depth

    attr_reader :ancestors_base, :descendants_base, :model, :options

    # ancestors_base - An instance of ActiveRecord::Relation for which to
    #                  get parent objects.
    # descendants_base - An instance of ActiveRecord::Relation for which to
    #                    get child objects. If omitted, ancestors_base is used.
    def initialize(ancestors_base, descendants_base = ancestors_base, options: {})
      raise ArgumentError.new("Model of ancestors_base does not match model of descendants_base") if ancestors_base.model != descendants_base.model

      @ancestors_base = ancestors_base
      @descendants_base = descendants_base
      @model = ancestors_base.model
      @options = options
    end

    # Returns the set of descendants of a given relation, but excluding the given
    # relation
    # rubocop: disable CodeReuse/ActiveRecord
    def descendants
      base_and_descendants.where.not(id: descendants_base.select(:id))
    end
    # rubocop: enable CodeReuse/ActiveRecord

    # Returns the maximum depth starting from the base
    # A base object with no children has a maximum depth of `1`
    def max_descendants_depth
      base_and_descendants(with_depth: true).maximum(DEPTH_COLUMN)
    end

    # Returns the set of ancestors of a given relation, but excluding the given
    # relation
    #
    # Passing an `upto` will stop the recursion once the specified parent_id is
    # reached. So all ancestors *lower* than the specified ancestor will be
    # included.
    # rubocop: disable CodeReuse/ActiveRecord
    def ancestors(upto: nil, hierarchy_order: nil)
      base_and_ancestors(upto: upto, hierarchy_order: hierarchy_order).where.not(id: ancestors_base.select(:id))
    end
    # rubocop: enable CodeReuse/ActiveRecord

    # Returns a relation that includes the ancestors_base set of objects
    # and all their ancestors (recursively).
    #
    # Passing an `upto` will stop the recursion once the specified parent_id is
    # reached. So all ancestors *lower* than the specified ancestor will be
    # included.
    #
    # Passing a `hierarchy_order` with either `:asc` or `:desc` will cause the
    # recursive query order from most nested object to root or from the root
    # ancestor to most nested object respectively. This uses a `depth` column
    # where `1` is defined as the depth for the base and increment as we go up
    # each parent.
    #
    # Note: By default the order is breadth-first
    # rubocop: disable CodeReuse/ActiveRecord
    def base_and_ancestors(upto: nil, hierarchy_order: nil)
      if use_distinct?
        expose_depth = hierarchy_order.present?
        hierarchy_order ||= :asc

        # if hierarchy_order is given, the calculated `depth` should be present in SELECT
        if expose_depth
          recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all).distinct
          read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: hierarchy_order))
        else
          recursive_query = base_and_ancestors_cte(upto).apply_to(model.all)

          if skip_ordering?
            recursive_query = recursive_query.distinct
          else
            recursive_query = recursive_query.reselect(*recursive_query.arel.projections, 'ROW_NUMBER() OVER () as depth').distinct
            recursive_query = model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table))
            recursive_query = remove_depth_and_maintain_order(recursive_query, hierarchy_order: hierarchy_order)
          end

          read_only(recursive_query)
        end
      else
        recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all)
        recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order
        read_only(recursive_query)
      end
    end
    # rubocop: enable CodeReuse/ActiveRecord

    # Returns a relation that includes the descendants_base set of objects
    # and all their descendants (recursively).
    #
    # When `with_depth` is `true`, a `depth` column is included where it starts with `1` for the base objects
    # and incremented as we go down the descendant tree
    # rubocop: disable CodeReuse/ActiveRecord
    def base_and_descendants(with_depth: false)
      if use_distinct?
        # Always calculate `depth`, remove it later if with_depth is false
        if with_depth
          base_cte = base_and_descendants_cte(with_depth: true).apply_to(model.all).distinct
          read_only(model.from(Arel::Nodes::As.new(base_cte.arel, objects_table)).order(depth: :asc))
        else
          base_cte = base_and_descendants_cte.apply_to(model.all)

          if skip_ordering?
            base_cte = base_cte.distinct
          else
            base_cte = base_cte.reselect(*base_cte.arel.projections, 'ROW_NUMBER() OVER () as depth').distinct
            base_cte = model.from(Arel::Nodes::As.new(base_cte.arel, objects_table))
            base_cte = remove_depth_and_maintain_order(base_cte, hierarchy_order: :asc)
          end

          read_only(base_cte)
        end
      else
        read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(model.all))
      end
    end
    # rubocop: enable CodeReuse/ActiveRecord

    # Returns a relation that includes the base objects, their ancestors,
    # and the descendants of the base objects.
    #
    # The resulting query will roughly look like the following:
    #
    #     WITH RECURSIVE ancestors AS ( ... ),
    #       descendants AS ( ... )
    #     SELECT *
    #     FROM (
    #       SELECT *
    #       FROM ancestors namespaces
    #
    #       UNION
    #
    #       SELECT *
    #       FROM descendants namespaces
    #     ) groups;
    #
    # Using this approach allows us to further add criteria to the relation with
    # Rails thinking it's selecting data the usual way.
    #
    # If nested objects are not supported, ancestors_base is returned.
    # rubocop: disable CodeReuse/ActiveRecord
    def all_objects
      ancestors = base_and_ancestors_cte
      descendants = base_and_descendants_cte

      ancestors_table = ancestors.alias_to(objects_table)
      descendants_table = descendants.alias_to(objects_table)

      ancestors_scope = model.unscoped.from(ancestors_table)
      descendants_scope = model.unscoped.from(descendants_table)

      if use_distinct?
        ancestors_scope = ancestors_scope.distinct
        descendants_scope = descendants_scope.distinct
      end

      relation = model
        .unscoped
        .with
        .recursive(ancestors.to_arel, descendants.to_arel)
        .from_union([
          ancestors_scope,
          descendants_scope
        ])

      read_only(relation)
    end
    # rubocop: enable CodeReuse/ActiveRecord

    private

    # Use distinct on the Namespace queries to avoid bad planner behavior in PG11.
    def use_distinct?
      return unless model <= Namespace
      # Global use_distinct_for_all_object_hierarchy takes precedence over use_distinct_in_object_hierarchy
      return true if Feature.enabled?(:use_distinct_for_all_object_hierarchy)
      return options[:use_distinct] if options.key?(:use_distinct)

      false
    end

    # Skips the extra ordering when using distinct on the namespace queries
    def skip_ordering?
      return options[:skip_ordering] if options.key?(:skip_ordering)

      false
    end

    # Remove the extra `depth` field using an INNER JOIN to avoid breaking UNION queries
    # and ordering the rows based on the `depth` column to maintain the row order.
    #
    # rubocop: disable CodeReuse/ActiveRecord
    def remove_depth_and_maintain_order(relation, hierarchy_order: :asc)
      joined_relation = model.joins("INNER JOIN (#{relation.select(:id, :depth).to_sql}) namespaces_join_table on namespaces_join_table.id = #{model.table_name}.id").order("namespaces_join_table.depth" => hierarchy_order)

      model.from(Arel::Nodes::As.new(joined_relation.arel, objects_table))
    end
    # rubocop: enable CodeReuse/ActiveRecord

    # rubocop: disable CodeReuse/ActiveRecord
    def base_and_ancestors_cte(stop_id = nil, hierarchy_order = nil)
      cte = SQL::RecursiveCTE.new(:base_and_ancestors)

      base_query = ancestors_base.except(:order)
      base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order

      cte << base_query

      # Recursively get all the ancestors of the base set.
      parent_query = model
        .from(from_tables(cte))
        .where(ancestor_conditions(cte))
        .except(:order)

      if hierarchy_order
        quoted_objects_table_name = model.connection.quote_table_name(objects_table.name)

        parent_query = parent_query.select(
          cte.table[DEPTH_COLUMN] + 1,
          "tree_path || #{quoted_objects_table_name}.id",
          "#{quoted_objects_table_name}.id = ANY(tree_path)",
          objects_table[Arel.star]
        ).where(cte.table[:tree_cycle].eq(false))
      end

      parent_query = parent_query.where(parent_id_column(cte).not_eq(stop_id)) if stop_id

      cte << parent_query
      cte
    end
    # rubocop: enable CodeReuse/ActiveRecord

    # rubocop: disable CodeReuse/ActiveRecord
    def base_and_descendants_cte(with_depth: false)
      cte = SQL::RecursiveCTE.new(:base_and_descendants)

      base_query = descendants_base.except(:order)
      base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth

      cte << base_query

      # Recursively get all the descendants of the base set.
      descendants_query = model
        .from(from_tables(cte))
        .where(descendant_conditions(cte))
        .except(:order)

      if with_depth
        quoted_objects_table_name = model.connection.quote_table_name(objects_table.name)

        descendants_query = descendants_query.select(
          cte.table[DEPTH_COLUMN] + 1,
          "tree_path || #{quoted_objects_table_name}.id",
          "#{quoted_objects_table_name}.id = ANY(tree_path)",
          objects_table[Arel.star]
        ).where(cte.table[:tree_cycle].eq(false))
      end

      cte << descendants_query
      cte
    end
    # rubocop: enable CodeReuse/ActiveRecord

    def objects_table
      model.arel_table
    end

    def parent_id_column(cte)
      cte.table[:parent_id]
    end

    def from_tables(cte)
      [objects_table, cte.table]
    end

    def ancestor_conditions(cte)
      objects_table[:id].eq(cte.table[:parent_id])
    end

    def descendant_conditions(cte)
      objects_table[:parent_id].eq(cte.table[:id])
    end

    def read_only(relation)
      # relations using a CTE are not safe to use with update_all as it will
      # throw away the CTE, hence we mark them as read-only.
      relation.extend(Gitlab::Database::ReadOnlyRelation)
      relation
    end
  end
end

Gitlab::ObjectHierarchy.prepend_if_ee('EE::Gitlab::ObjectHierarchy')