summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration/fix_projects_without_prometheus_service.rb
blob: 97a9913fa74488d0af6424336267fc8819ffe464 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# frozen_string_literal: true

module Gitlab
  module BackgroundMigration
    # This migration creates missing services records
    # for the projects within the given range of ids
    class FixProjectsWithoutPrometheusService
      # There is important inconsistency between single query timeout 15s and background migration worker minimum lease 2 minutes
      # to address that scheduled ids range (for minimum 2 minutes processing) should be inserted in smaller portions to fit under 15s limit.
      # https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/9064#note_279857215
      MAX_BATCH_SIZE = 1_000
      DEFAULTS = {
        'active' => true,
        'properties' => "'{}'",
        'type' => "'PrometheusService'",
        'template' => false,
        'push_events' => true,
        'issues_events' => true,
        'merge_requests_events' => true,
        'tag_push_events' => true,
        'note_events' => true,
        'category' => "'monitoring'",
        'default' => false,
        'wiki_page_events' => true,
        'pipeline_events' => true,
        'confidential_issues_events' => true,
        'commit_events' => true,
        'job_events' => true,
        'confidential_note_events' => true
      }.freeze

      module Migratable
        module Applications
          # Migration model namespace isolated from application code.
          class Prometheus
            def self.statuses
              {
                errored: -1,
                installed: 3,
                updated: 5
              }
            end
          end
        end

        # Migration model namespace isolated from application code.
        class Cluster < ActiveRecord::Base
          self.table_name = 'clusters'

          enum cluster_type: {
            instance_type: 1,
            group_type: 2
          }

          def self.has_prometheus_application?
            joins("INNER JOIN clusters_applications_prometheus ON clusters_applications_prometheus.cluster_id = clusters.id
                   AND clusters_applications_prometheus.status IN (#{Applications::Prometheus.statuses[:installed]}, #{Applications::Prometheus.statuses[:updated]})").exists?
          end
        end

        # Migration model namespace isolated from application code.
        class PrometheusService < ActiveRecord::Base
          self.inheritance_column = :_type_disabled
          self.table_name = 'services'
          default_scope { where(type: type) } # rubocop:disable Cop/DefaultScope

          def self.type
            'PrometheusService'
          end

          def self.template
            find_by(template: true)
          end

          def self.values
            (template&.attributes_for_insert || DEFAULTS).merge('template' => false, 'active' => true).values
          end

          def attributes_for_insert
            slice(DEFAULTS.keys).transform_values do |v|
              v.is_a?(String) ? "'#{v}'" : v
            end
          end
        end

        # Migration model namespace isolated from application code.
        class Project < ActiveRecord::Base
          self.table_name = 'projects'

          scope :select_for_insert, -> {
            select('id')
              .select(PrometheusService.values.join(','))
              .select("TIMEZONE('UTC', NOW()) as created_at", "TIMEZONE('UTC', NOW()) as updated_at")
          }

          scope :with_prometheus_services, ->(from_id, to_id) {
            joins("LEFT JOIN services ON services.project_id = projects.id AND services.project_id BETWEEN #{Integer(from_id)} AND #{Integer(to_id)}
                    AND services.type = '#{PrometheusService.type}'")
          }

          scope :with_group_prometheus_installed, -> {
            joins("INNER JOIN cluster_groups ON cluster_groups.group_id = projects.namespace_id")
              .joins("INNER JOIN clusters_applications_prometheus ON clusters_applications_prometheus.cluster_id = cluster_groups.cluster_id
                      AND clusters_applications_prometheus.status IN (#{Applications::Prometheus.statuses[:installed]}, #{Applications::Prometheus.statuses[:updated]})")
          }
        end
      end

      def perform(from_id, to_id)
        (from_id..to_id).each_slice(MAX_BATCH_SIZE) do |batch|
          process_batch(batch.first, batch.last)
        end
      end

      private

      def process_batch(from_id, to_id)
        update_inconsistent(from_id, to_id)
        create_missing(from_id, to_id)
      end

      def create_missing(from_id, to_id)
        result = ApplicationRecord.connection.select_one(create_sql(from_id, to_id))
        return unless result

        logger.info(message: "#{self.class}: created missing services for #{result['number_of_created_records']} projects in id=#{from_id}...#{to_id}")
      end

      def update_inconsistent(from_id, to_id)
        result = ApplicationRecord.connection.select_one(update_sql(from_id, to_id))
        return unless result

        logger.info(message: "#{self.class}: updated inconsistent services for #{result['number_of_updated_records']} projects in id=#{from_id}...#{to_id}")
      end

      # there is no uniq constraint on project_id and type pair, which prevents us from using ON CONFLICT
      def create_sql(from_id, to_id)
        <<~SQL
          WITH created_records AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
            INSERT INTO services (project_id, #{DEFAULTS.keys.map { |key| %("#{key}") }.join(',')}, created_at, updated_at)
            #{select_insert_values_sql(from_id, to_id)}
            RETURNING *
          )
          SELECT COUNT(*) as number_of_created_records
          FROM created_records
        SQL
      end

      # there is no uniq constraint on project_id and type pair, which prevents us from using ON CONFLICT
      def update_sql(from_id, to_id)
        <<~SQL
          WITH updated_records AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
            UPDATE services SET active = TRUE
            WHERE services.project_id BETWEEN #{Integer(from_id)} AND #{Integer(to_id)} AND services.properties = '{}' AND services.type = '#{Migratable::PrometheusService.type}'
            AND #{group_cluster_condition(from_id, to_id)} AND services.active = FALSE
            RETURNING *
          )
          SELECT COUNT(*) as number_of_updated_records
          FROM updated_records
        SQL
      end

      def group_cluster_condition(from_id, to_id)
        return '1 = 1' if migrate_instance_cluster?

        <<~SQL
          EXISTS (
            #{Migratable::Project.select(1).with_group_prometheus_installed.where("projects.id BETWEEN ? AND ?", Integer(from_id), Integer(to_id)).to_sql}
          )
        SQL
      end

      def select_insert_values_sql(from_id, to_id)
        scope = Migratable::Project
                  .select_for_insert
                  .with_prometheus_services(from_id, to_id)
                  .where("projects.id BETWEEN ? AND ? AND services.id IS NULL", Integer(from_id), Integer(to_id))

        return scope.to_sql if migrate_instance_cluster?

        scope.with_group_prometheus_installed.to_sql
      end

      def logger
        @logger ||= Gitlab::BackgroundMigration::Logger.build
      end

      def migrate_instance_cluster?
        if instance_variable_defined?('@migrate_instance_cluster')
          @migrate_instance_cluster
        else
          @migrate_instance_cluster = Migratable::Cluster.instance_type.has_prometheus_application?
        end
      end
    end
  end
end