summaryrefslogtreecommitdiff
path: root/lib/gitlab/database.rb
blob: 59249c8bc1f438cf61a99657c4b3a901554050b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# frozen_string_literal: true

module Gitlab
  module Database
    # This constant is used when renaming tables concurrently.
    # If you plan to rename a table using the `rename_table_safely` method, add your table here one milestone before the rename.
    # Example:
    # TABLES_TO_BE_RENAMED = {
    #   'old_name' => 'new_name'
    # }.freeze
    TABLES_TO_BE_RENAMED = {
      'analytics_instance_statistics_measurements' => 'analytics_usage_trends_measurements'
    }.freeze

    # Minimum PostgreSQL version requirement per documentation:
    # https://docs.gitlab.com/ee/install/requirements.html#postgresql-requirements
    MINIMUM_POSTGRES_VERSION = 11

    # https://www.postgresql.org/docs/9.2/static/datatype-numeric.html
    MAX_INT_VALUE = 2147483647
    MIN_INT_VALUE = -2147483648

    # The max value between MySQL's TIMESTAMP and PostgreSQL's timestampz:
    # https://www.postgresql.org/docs/9.1/static/datatype-datetime.html
    # https://dev.mysql.com/doc/refman/5.7/en/datetime.html
    # FIXME: this should just be the max value of timestampz
    MAX_TIMESTAMP_VALUE = Time.at((1 << 31) - 1).freeze

    # The maximum number of characters for text fields, to avoid DoS attacks via parsing huge text fields
    # https://gitlab.com/gitlab-org/gitlab-foss/issues/61974
    MAX_TEXT_SIZE_LIMIT = 1_000_000

    # Minimum schema version from which migrations are supported
    # Migrations before this version may have been removed
    MIN_SCHEMA_VERSION = 20190506135400
    MIN_SCHEMA_GITLAB_VERSION = '11.11.0'

    # Schema we store dynamically managed partitions in (e.g. for time partitioning)
    DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic

    # Schema we store static partitions in (e.g. for hash partitioning)
    STATIC_PARTITIONS_SCHEMA = :gitlab_partitions_static

    # This is an extensive list of postgres schemas owned by GitLab
    # It does not include the default public schema
    EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA, STATIC_PARTITIONS_SCHEMA].freeze

    DEFAULT_POOL_HEADROOM = 10

    # We configure the database connection pool size automatically based on the
    # configured concurrency. We also add some headroom, to make sure we don't run
    # out of connections when more threads besides the 'user-facing' ones are
    # running.
    #
    # Read more about this in doc/development/database/client_side_connection_pool.md
    def self.default_pool_size
      headroom = (ENV["DB_POOL_HEADROOM"].presence || DEFAULT_POOL_HEADROOM).to_i

      Gitlab::Runtime.max_threads + headroom
    end

    def self.config
      default_config_hash = ActiveRecord::Base.configurations.find_db_config(Rails.env)&.config || {}

      default_config_hash.with_indifferent_access.tap do |hash|
        # Match config/initializers/database_config.rb
        hash[:pool] ||= default_pool_size
      end
    end

    def self.username
      config['username'] || ENV['USER']
    end

    def self.database_name
      config['database']
    end

    def self.adapter_name
      config['adapter']
    end

    def self.human_adapter_name
      if postgresql?
        'PostgreSQL'
      else
        'Unknown'
      end
    end

    # @deprecated
    def self.postgresql?
      adapter_name.casecmp('postgresql') == 0
    end

    def self.read_only?
      false
    end

    def self.read_write?
      !self.read_only?
    end

    # Check whether the underlying database is in read-only mode
    def self.db_read_only?
      pg_is_in_recovery =
        ActiveRecord::Base
          .connection
          .execute('SELECT pg_is_in_recovery()')
          .first
          .fetch('pg_is_in_recovery')

      Gitlab::Utils.to_boolean(pg_is_in_recovery)
    end

    def self.db_read_write?
      !self.db_read_only?
    end

    def self.version
      @version ||= database_version.match(/\A(?:PostgreSQL |)([^\s]+).*\z/)[1]
    end

    def self.postgresql_minimum_supported_version?
      version.to_f >= MINIMUM_POSTGRES_VERSION
    end

    def self.check_postgres_version_and_print_warning
      return if Gitlab::Database.postgresql_minimum_supported_version?
      return if Gitlab::Runtime.rails_runner?

      Kernel.warn ERB.new(Rainbow.new.wrap(<<~EOS).red).result

                  ██     ██  █████  ██████  ███    ██ ██ ███    ██  ██████ 
                  ██     ██ ██   ██ ██   ██ ████   ██ ██ ████   ██ ██      
                  ██  █  ██ ███████ ██████  ██ ██  ██ ██ ██ ██  ██ ██   ███ 
                  ██ ███ ██ ██   ██ ██   ██ ██  ██ ██ ██ ██  ██ ██ ██    ██ 
                   ███ ███  ██   ██ ██   ██ ██   ████ ██ ██   ████  ██████  

        ******************************************************************************
          You are using PostgreSQL <%= Gitlab::Database.version %>, but PostgreSQL >= <%= Gitlab::Database::MINIMUM_POSTGRES_VERSION %>
          is required for this version of GitLab.
          <% if Rails.env.development? || Rails.env.test? %>
          If using gitlab-development-kit, please find the relevant steps here:
            https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/master/doc/howto/postgresql.md#upgrade-postgresql
          <% end %>
          Please upgrade your environment to a supported PostgreSQL version, see
          https://docs.gitlab.com/ee/install/requirements.html#database for details.
        ******************************************************************************
      EOS
    rescue ActiveRecord::ActiveRecordError, PG::Error
      # ignore - happens when Rake tasks yet have to create a database, e.g. for testing
    end

    def self.nulls_order(field, direction = :asc, nulls_order = :nulls_last)
      raise ArgumentError unless [:nulls_last, :nulls_first].include?(nulls_order)
      raise ArgumentError unless [:asc, :desc].include?(direction)

      case nulls_order
      when :nulls_last then nulls_last_order(field, direction)
      when :nulls_first then nulls_first_order(field, direction)
      end
    end

    def self.nulls_last_order(field, direction = 'ASC')
      Arel.sql("#{field} #{direction} NULLS LAST")
    end

    def self.nulls_first_order(field, direction = 'ASC')
      Arel.sql("#{field} #{direction} NULLS FIRST")
    end

    def self.random
      "RANDOM()"
    end

    def self.true_value
      "'t'"
    end

    def self.false_value
      "'f'"
    end

    def self.with_connection_pool(pool_size)
      pool = create_connection_pool(pool_size)

      begin
        yield(pool)
      ensure
        pool.disconnect!
      end
    end

    # Bulk inserts a number of rows into a table, optionally returning their
    # IDs.
    #
    # table - The name of the table to insert the rows into.
    # rows - An Array of Hash instances, each mapping the columns to their
    #        values.
    # return_ids - When set to true the return value will be an Array of IDs of
    #              the inserted rows
    # disable_quote - A key or an Array of keys to exclude from quoting (You
    #                 become responsible for protection from SQL injection for
    #                 these keys!)
    # on_conflict - Defines an upsert. Values can be: :disabled (default) or
    #               :do_nothing
    def self.bulk_insert(table, rows, return_ids: false, disable_quote: [], on_conflict: nil)
      return if rows.empty?

      keys = rows.first.keys
      columns = keys.map { |key| connection.quote_column_name(key) }

      disable_quote = Array(disable_quote).to_set
      tuples = rows.map do |row|
        keys.map do |k|
          disable_quote.include?(k) ? row[k] : connection.quote(row[k])
        end
      end

      sql = <<-EOF
        INSERT INTO #{table} (#{columns.join(', ')})
        VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
      EOF

      sql = "#{sql} ON CONFLICT DO NOTHING" if on_conflict == :do_nothing

      sql = "#{sql} RETURNING id" if return_ids

      result = connection.execute(sql)

      if return_ids
        result.values.map { |tuple| tuple[0].to_i }
      else
        []
      end
    end

    def self.sanitize_timestamp(timestamp)
      MAX_TIMESTAMP_VALUE > timestamp ? timestamp : MAX_TIMESTAMP_VALUE.dup
    end

    # pool_size - The size of the DB pool.
    # host - An optional host name to use instead of the default one.
    def self.create_connection_pool(pool_size, host = nil, port = nil)
      original_config = Gitlab::Database.config

      env_config = original_config.merge(pool: pool_size)
      env_config[:host] = host if host
      env_config[:port] = port if port

      ActiveRecord::ConnectionAdapters::ConnectionHandler.new.establish_connection(env_config)
    end

    def self.connection
      ActiveRecord::Base.connection
    end
    private_class_method :connection

    def self.cached_column_exists?(table_name, column_name)
      connection.schema_cache.columns_hash(table_name).has_key?(column_name.to_s)
    end

    def self.cached_table_exists?(table_name)
      exists? && connection.schema_cache.data_source_exists?(table_name)
    end

    def self.database_version
      row = connection.execute("SELECT VERSION()").first

      row['version']
    end

    def self.exists?
      connection

      true
    rescue StandardError
      false
    end

    def self.system_id
      row = connection.execute('SELECT system_identifier FROM pg_control_system()').first

      row['system_identifier']
    end

    # @param [ActiveRecord::Connection] ar_connection
    # @return [String]
    def self.get_write_location(ar_connection)
      use_new_load_balancer_query = Gitlab::Utils.to_boolean(ENV['USE_NEW_LOAD_BALANCER_QUERY'], default: false)

      sql = if use_new_load_balancer_query
              <<~NEWSQL
                SELECT CASE
                    WHEN pg_is_in_recovery() = true AND EXISTS (SELECT 1 FROM pg_stat_get_wal_senders())
                      THEN pg_last_wal_replay_lsn()::text
                    WHEN pg_is_in_recovery() = false
                      THEN pg_current_wal_insert_lsn()::text
                      ELSE NULL
                    END AS location;
              NEWSQL
            else
              <<~SQL
                SELECT pg_current_wal_insert_lsn()::text AS location
              SQL
            end

      row = ar_connection.select_all(sql).first
      row['location'] if row
    end

    private_class_method :database_version

    def self.add_post_migrate_path_to_rails(force: false)
      return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force

      Rails.application.config.paths['db'].each do |db_path|
        path = Rails.root.join(db_path, 'post_migrate').to_s

        unless Rails.application.config.paths['db/migrate'].include? path
          Rails.application.config.paths['db/migrate'] << path

          # Rails memoizes migrations at certain points where it won't read the above
          # path just yet. As such we must also update the following list of paths.
          ActiveRecord::Migrator.migrations_paths << path
        end
      end
    end

    # inside_transaction? will return true if the caller is running within a transaction. Handles special cases
    # when running inside a test environment, where tests may be wrapped in transactions
    def self.inside_transaction?
      if Rails.env.test?
        ActiveRecord::Base.connection.open_transactions > open_transactions_baseline
      else
        ActiveRecord::Base.connection.open_transactions > 0
      end
    end

    # These methods that access @open_transactions_baseline are not thread-safe.
    # These are fine though because we only call these in RSpec's main thread. If we decide to run
    # specs multi-threaded, we would need to use something like ThreadGroup to keep track of this value
    def self.set_open_transactions_baseline
      @open_transactions_baseline = ActiveRecord::Base.connection.open_transactions
    end

    def self.reset_open_transactions_baseline
      @open_transactions_baseline = 0
    end

    def self.open_transactions_baseline
      @open_transactions_baseline ||= 0
    end
    private_class_method :open_transactions_baseline

    # Monkeypatch rails with upgraded database observability
    def self.install_monkey_patches
      ActiveRecord::Base.prepend(ActiveRecordBaseTransactionMetrics)
    end

    # MonkeyPatch for ActiveRecord::Base for adding observability
    module ActiveRecordBaseTransactionMetrics
      extend ActiveSupport::Concern

      class_methods do
        # A monkeypatch over ActiveRecord::Base.transaction.
        # It provides observability into transactional methods.
        def transaction(**options, &block)
          ActiveSupport::Notifications.instrument('transaction.active_record', { connection: connection }) do
            super(**options, &block)
          end
        end
      end
    end
  end
end

Gitlab::Database.prepend_mod_with('Gitlab::Database')