summaryrefslogtreecommitdiff
path: root/lib/gitlab/database.rb
blob: 1895f0fab321f94039dacbcc194a9904f73f68a1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# frozen_string_literal: true

module Gitlab
  module Database
    DATABASE_NAMES = %w[main ci].freeze

    MAIN_DATABASE_NAME = 'main'
    CI_DATABASE_NAME = 'ci'
    DEFAULT_POOL_HEADROOM = 10

    # This constant is used when renaming tables concurrently.
    # If you plan to rename a table using the `rename_table_safely` method, add your table here one milestone before the rename.
    # Example:
    # TABLES_TO_BE_RENAMED = {
    #   'old_name' => 'new_name'
    # }.freeze
    TABLES_TO_BE_RENAMED = {}.freeze

    # Minimum PostgreSQL version requirement per documentation:
    # https://docs.gitlab.com/ee/install/requirements.html#postgresql-requirements
    MINIMUM_POSTGRES_VERSION = 12

    # https://www.postgresql.org/docs/9.2/static/datatype-numeric.html
    MAX_INT_VALUE = 2147483647
    MIN_INT_VALUE = -2147483648

    # The max value between MySQL's TIMESTAMP and PostgreSQL's timestampz:
    # https://www.postgresql.org/docs/9.1/static/datatype-datetime.html
    # https://dev.mysql.com/doc/refman/5.7/en/datetime.html
    # FIXME: this should just be the max value of timestampz
    MAX_TIMESTAMP_VALUE = Time.at((1 << 31) - 1).freeze

    # The maximum number of characters for text fields, to avoid DoS attacks via parsing huge text fields
    # https://gitlab.com/gitlab-org/gitlab-foss/issues/61974
    MAX_TEXT_SIZE_LIMIT = 1_000_000

    # Minimum schema version from which migrations are supported
    # Migrations before this version may have been removed
    MIN_SCHEMA_VERSION = 20190506135400
    MIN_SCHEMA_GITLAB_VERSION = '11.11.0'

    # Schema we store dynamically managed partitions in (e.g. for time partitioning)
    DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic

    # Schema we store static partitions in (e.g. for hash partitioning)
    STATIC_PARTITIONS_SCHEMA = :gitlab_partitions_static

    # This is an extensive list of postgres schemas owned by GitLab
    # It does not include the default public schema
    EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA, STATIC_PARTITIONS_SCHEMA].freeze

    PRIMARY_DATABASE_NAME = ActiveRecord::Base.connection_db_config.name.to_sym

    def self.database_base_models
      @database_base_models ||= {
        # Note that we use ActiveRecord::Base here and not ApplicationRecord.
        # This is deliberate, as we also use these classes to apply load
        # balancing to, and the load balancer must be enabled for _all_ models
        # that inher from ActiveRecord::Base; not just our own models that
        # inherit from ApplicationRecord.
        main: ::ActiveRecord::Base,
        ci: ::Ci::ApplicationRecord.connection_class? ? ::Ci::ApplicationRecord : nil
      }.compact.with_indifferent_access.freeze
    end

    # This returns a list of base models with connection associated for a given gitlab_schema
    def self.schemas_to_base_models
      @schemas_to_base_models ||= {
        gitlab_main: [self.database_base_models.fetch(:main)],
        gitlab_ci: [self.database_base_models[:ci] || self.database_base_models.fetch(:main)], # use CI or fallback to main
        gitlab_shared: self.database_base_models.values # all models
      }.with_indifferent_access.freeze
    end

    def self.all_database_names
      DATABASE_NAMES
    end

    # We configure the database connection pool size automatically based on the
    # configured concurrency. We also add some headroom, to make sure we don't
    # run out of connections when more threads besides the 'user-facing' ones
    # are running.
    #
    # Read more about this in
    # doc/development/database/client_side_connection_pool.md
    def self.default_pool_size
      headroom =
        (ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i

      Gitlab::Runtime.max_threads + headroom
    end

    def self.has_config?(database_name)
      Gitlab::Application.config.database_configuration[Rails.env].include?(database_name.to_s)
    end

    def self.main_database?(name)
      # The database is `main` if it is a first entry in `database.yml`
      # Rails internally names them `primary` to avoid confusion
      # with broad `primary` usage we use `main` instead
      #
      # TODO: The explicit `== 'main'` is needed in a transition period till
      # the `database.yml` is not migrated into `main:` syntax
      # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65243
      ActiveRecord::Base.configurations.primary?(name.to_s) || name.to_s == 'main'
    end

    def self.ci_database?(name)
      name.to_s == CI_DATABASE_NAME
    end

    class PgUser < ApplicationRecord
      self.table_name = 'pg_user'
      self.primary_key = :usename
    end

    # rubocop: disable CodeReuse/ActiveRecord
    def self.check_for_non_superuser
      user = PgUser.find_by('usename = CURRENT_USER')
      am_i_superuser = user.usesuper

      Gitlab::AppLogger.info(
        "Account details: User: \"#{user.usename}\", UseSuper: (#{am_i_superuser})"
      )

      raise 'Error: detected superuser' if am_i_superuser
    rescue ActiveRecord::StatementInvalid
      raise 'User CURRENT_USER not found'
    end
    # rubocop: enable CodeReuse/ActiveRecord

    def self.check_postgres_version_and_print_warning
      return if Gitlab::Runtime.rails_runner?

      database_base_models.each do |name, model|
        database = Gitlab::Database::Reflection.new(model)

        next if database.postgresql_minimum_supported_version?

        Kernel.warn ERB.new(Rainbow.new.wrap(<<~EOS).red).result

                    ██     ██  █████  ██████  ███    ██ ██ ███    ██  ██████ 
                    ██     ██ ██   ██ ██   ██ ████   ██ ██ ████   ██ ██      
                    ██  █  ██ ███████ ██████  ██ ██  ██ ██ ██ ██  ██ ██   ███ 
                    ██ ███ ██ ██   ██ ██   ██ ██  ██ ██ ██ ██  ██ ██ ██    ██ 
                     ███ ███  ██   ██ ██   ██ ██   ████ ██ ██   ████  ██████  

          ******************************************************************************
            You are using PostgreSQL #{database.version} for the #{name} database, but PostgreSQL >= <%= Gitlab::Database::MINIMUM_POSTGRES_VERSION %>
            is required for this version of GitLab.
            <% if Rails.env.development? || Rails.env.test? %>
            If using gitlab-development-kit, please find the relevant steps here:
              https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/main/doc/howto/postgresql.md#upgrade-postgresql
            <% end %>
            Please upgrade your environment to a supported PostgreSQL version, see
            https://docs.gitlab.com/ee/install/requirements.html#database for details.
          ******************************************************************************
        EOS
      rescue ActiveRecord::ActiveRecordError, PG::Error
        # ignore - happens when Rake tasks yet have to create a database, e.g. for testing
      end
    end

    def self.random
      "RANDOM()"
    end

    def self.true_value
      "'t'"
    end

    def self.false_value
      "'f'"
    end

    def self.sanitize_timestamp(timestamp)
      MAX_TIMESTAMP_VALUE > timestamp ? timestamp : MAX_TIMESTAMP_VALUE.dup
    end

    def self.all_uncached(&block)
      # Calls to #uncached only disable caching for the current connection. Since the load balancer
      # can potentially upgrade from read to read-write mode (using a different connection), we specify
      # up-front that we'll explicitly use the primary for the duration of the operation.
      Gitlab::Database::LoadBalancing::Session.current.use_primary do
        base_models = database_base_models.values
        base_models.reduce(block) { |blk, model| -> { model.uncached(&blk) } }.call
      end
    end

    def self.allow_cross_joins_across_databases(url:)
      # this method is implemented in:
      # spec/support/database/prevent_cross_joins.rb
      yield
    end

    def self.add_post_migrate_path_to_rails(force: false)
      return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force

      Rails.application.config.paths['db'].each do |db_path|
        path = Rails.root.join(db_path, 'post_migrate').to_s

        unless Rails.application.config.paths['db/migrate'].include? path
          Rails.application.config.paths['db/migrate'] << path

          # Rails memoizes migrations at certain points where it won't read the above
          # path just yet. As such we must also update the following list of paths.
          ActiveRecord::Migrator.migrations_paths << path
        end
      end
    end

    def self.db_config_names
      ::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name) - ['geo']
    end

    # This returns all matching schemas that a given connection can use
    # Since the `ActiveRecord::Base` might change the connection (from main to ci)
    # This does not look at literal connection names, but rather compares
    # models that are holders for a given db_config_name
    def self.gitlab_schemas_for_connection(connection)
      db_name = self.db_config_name(connection)
      primary_model = self.database_base_models.fetch(db_name.to_sym)

      self.schemas_to_base_models.select do |_, child_models|
        child_models.any? do |child_model|
          child_model == primary_model || \
            # The model might indicate a child connection, ensure that this is enclosed in a `db_config`
            self.database_base_models[self.db_config_share_with(child_model.connection_db_config)] == primary_model
        end
      end.keys.map!(&:to_sym)
    end

    def self.db_config_for_connection(connection)
      return unless connection

      if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy)
        return connection.load_balancer.configuration.primary_db_config
      end

      # During application init we might receive `NullPool`
      return unless connection.respond_to?(:pool) &&
        connection.pool.respond_to?(:db_config)

      connection.pool.db_config
    end

    # At the moment, the connection can only be retrieved by
    # Gitlab::Database::LoadBalancer#read or #read_write or from the
    # ActiveRecord directly. Therefore, if the load balancer doesn't
    # recognize the connection, this method returns the primary role
    # directly. In future, we may need to check for other sources.
    # Expected returned names:
    # main, main_replica, ci, ci_replica, unknown
    def self.db_config_name(connection)
      db_config = db_config_for_connection(connection)
      db_config&.name || 'unknown'
    end

    # Currently the database configuration can only be shared with `main:`
    # If the `database_tasks: false` is being used
    # This is to be refined: https://gitlab.com/gitlab-org/gitlab/-/issues/356580
    def self.db_config_share_with(db_config)
      if db_config.database_tasks?
        nil # no sharing
      else
        'main' # share with `main:`
      end
    end

    def self.read_only?
      false
    end

    def self.read_write?
      !read_only?
    end

    # Monkeypatch rails with upgraded database observability
    def self.install_transaction_metrics_patches!
      ActiveRecord::Base.prepend(ActiveRecordBaseTransactionMetrics)
    end

    def self.install_transaction_context_patches!
      ActiveRecord::ConnectionAdapters::TransactionManager
        .prepend(TransactionManagerContext)
      ActiveRecord::ConnectionAdapters::RealTransaction
        .prepend(RealTransactionContext)
    end

    # MonkeyPatch for ActiveRecord::Base for adding observability
    module ActiveRecordBaseTransactionMetrics
      extend ActiveSupport::Concern

      class_methods do
        # A patch over ActiveRecord::Base.transaction that provides
        # observability into transactional methods.
        def transaction(**options, &block)
          transaction_type = get_transaction_type(connection.transaction_open?, options[:requires_new])

          ::Gitlab::Database::Metrics.subtransactions_increment(self.name) if transaction_type == :sub_transaction

          payload = { connection: connection, transaction_type: transaction_type }

          ActiveSupport::Notifications.instrument('transaction.active_record', payload) do
            super(**options, &block)
          end
        end

        private

        def get_transaction_type(transaction_open, requires_new_flag)
          if transaction_open
            return :sub_transaction if requires_new_flag

            return :fake_transaction
          end

          :real_transaction
        end
      end
    end

    # rubocop:disable Gitlab/ModuleWithInstanceVariables
    module TransactionManagerContext
      def transaction_context
        @stack.first.try(:gitlab_transaction_context)
      end
    end

    module RealTransactionContext
      def gitlab_transaction_context
        @gitlab_transaction_context ||= ::Gitlab::Database::Transaction::Context.new
      end

      def commit
        gitlab_transaction_context.commit

        super
      end

      def rollback
        gitlab_transaction_context.rollback

        super
      end
    end
    # rubocop:enable Gitlab/ModuleWithInstanceVariables
  end
end

Gitlab::Database.prepend_mod_with('Gitlab::Database')