summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/reindexing/reindex_concurrently.rb
blob: 7a720f7c539204a0363ed5dddaf58ede0f73ac8d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# frozen_string_literal: true

module Gitlab
  module Database
    module Reindexing
      # This is a >= PG12 reindexing strategy based on `REINDEX CONCURRENTLY`
      class ReindexConcurrently
        ReindexError = Class.new(StandardError)

        TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*'
        STATEMENT_TIMEOUT = 9.hours
        PG_MAX_INDEX_NAME_LENGTH = 63

        attr_reader :index, :logger

        def initialize(index, logger: Gitlab::AppLogger)
          @index = index
          @logger = logger
        end

        def perform
          raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
          raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/

          # Expression indexes require additional statistics in `pg_statistic`:
          # select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index');
          #
          # In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96.
          # Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com
          # following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885).
          #
          # While this has been backpatched, we continue to disable expression indexes until further review.
          raise ReindexError, 'expression indexes are currently not supported' if index.expression?

          begin
            with_logging do
              set_statement_timeout do
                execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
              end
            end
          ensure
            cleanup_dangling_indexes
          end
        end

        private

        def with_logging
          bloat_size = index.bloat_size
          ondisk_size_before = index.ondisk_size_bytes

          logger.info(
            message: "Starting reindex of #{index}",
            index: index.identifier,
            table: index.tablename,
            estimated_bloat_bytes: bloat_size,
            index_size_before_bytes: ondisk_size_before,
            relative_bloat_level: index.relative_bloat_level
          )

          duration = Benchmark.realtime do
            yield
          end

          index.reset

          logger.info(
            message: "Finished reindex of #{index}",
            index: index.identifier,
            table: index.tablename,
            estimated_bloat_bytes: bloat_size,
            index_size_before_bytes: ondisk_size_before,
            index_size_after_bytes: index.ondisk_size_bytes,
            relative_bloat_level: index.relative_bloat_level,
            duration_s: duration.round(2)
          )
        end

        def cleanup_dangling_indexes
          Gitlab::Database::PostgresIndex.match("#{TEMPORARY_INDEX_PATTERN}$").each do |lingering_index|
            # Example lingering index name: some_index_ccnew1

            # Example prefix: 'some_index'
            prefix = lingering_index.name.gsub(/#{TEMPORARY_INDEX_PATTERN}/, '')

            # Example suffix: '_ccnew1'
            suffix = lingering_index.name.match(/#{TEMPORARY_INDEX_PATTERN}/)[0]

            # Only remove if the lingering index name could have been chosen
            # as a result of a REINDEX operation (considering that PostgreSQL
            # truncates index names to 63 chars and adds a suffix).
            if index.name[0...PG_MAX_INDEX_NAME_LENGTH - suffix.length] == prefix
              remove_index(lingering_index)
            end
          end
        end

        def remove_index(index)
          logger.info("Removing dangling index #{index.identifier}")

          retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
            timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
            klass: self.class,
            logger: logger
          )

          retries.run(raise_on_exhaustion: false) do
            execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
          end
        end

        def with_lock_retries(&block)
          arguments = { klass: self.class, logger: logger }
          Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
        end

        def set_statement_timeout
          execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
          yield
        ensure
          execute('RESET statement_timeout')
        end

        delegate :execute, :quote_table_name, to: :connection
        def connection
          @connection ||= ActiveRecord::Base.connection
        end
      end
    end
  end
end