summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration/backfill_namespace_traversal_ids_children.rb
blob: 79e7a2f2279f01f295df84538d24617680e664a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# frozen_string_literal: true

module Gitlab
  module BackgroundMigration
    # A job to set namespaces.traversal_ids in sub-batches, of all namespaces with
    # a parent and not already set.
    # rubocop:disable Style/Documentation
    class BackfillNamespaceTraversalIdsChildren
      class Namespace < ActiveRecord::Base
        include ::EachBatch

        self.table_name = 'namespaces'

        scope :base_query, -> { where.not(parent_id: nil) }
      end

      PAUSE_SECONDS = 0.1

      def perform(start_id, end_id, sub_batch_size)
        batch_query = Namespace.base_query.where(id: start_id..end_id)
        batch_query.each_batch(of: sub_batch_size) do |sub_batch|
          first, last = sub_batch.pluck(Arel.sql('min(id), max(id)')).first
          ranged_query = Namespace.unscoped.base_query.where(id: first..last)

          update_sql = <<~SQL
            UPDATE namespaces
            SET traversal_ids = calculated_ids.traversal_ids
            FROM #{calculated_traversal_ids(ranged_query)} calculated_ids
            WHERE namespaces.id = calculated_ids.id
              AND namespaces.traversal_ids = '{}'
          SQL
          ActiveRecord::Base.connection.execute(update_sql)

          sleep PAUSE_SECONDS
        end

        # We have to add all arguments when marking a job as succeeded as they
        #  are all used to track the job by `queue_background_migration_jobs_by_range_at_intervals`
        mark_job_as_succeeded(start_id, end_id, sub_batch_size)
      end

      private

      # Calculate the ancestor path for a given set of namespaces.
      def calculated_traversal_ids(batch)
        <<~SQL
          (
            WITH RECURSIVE cte(source_id, namespace_id, parent_id, height) AS (
              (
                SELECT batch.id, batch.id, batch.parent_id, 1
                FROM (#{batch.to_sql}) AS batch
              )
              UNION ALL
              (
                SELECT cte.source_id, n.id, n.parent_id, cte.height+1
                FROM namespaces n, cte
                WHERE n.id = cte.parent_id
              )
            )
            SELECT flat_hierarchy.source_id as id,
                   array_agg(flat_hierarchy.namespace_id ORDER BY flat_hierarchy.height DESC) as traversal_ids
            FROM (SELECT * FROM cte FOR UPDATE) flat_hierarchy
            GROUP BY flat_hierarchy.source_id
          )
        SQL
      end

      def mark_job_as_succeeded(*arguments)
        Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
          'BackfillNamespaceTraversalIdsChildren',
          arguments
        )
      end
    end
  end
end