summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/reindexing/reindex_concurrently.rb
blob: 60fa4deda3904b98ae80928685f9266b6508926d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# frozen_string_literal: true

module Gitlab
  module Database
    module Reindexing
      # This is a >= PG12 reindexing strategy based on `REINDEX CONCURRENTLY`
      class ReindexConcurrently
        ReindexError = Class.new(StandardError)

        TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*'
        STATEMENT_TIMEOUT = 24.hours
        PG_MAX_INDEX_NAME_LENGTH = 63

        attr_reader :index, :logger

        def initialize(index, logger: Gitlab::AppLogger)
          @index = index
          @logger = logger
        end

        def perform
          raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
          raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/o

          # Expression indexes require additional statistics in `pg_statistic`:
          # select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index');
          #
          # In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96.
          # Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com
          # following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885).
          #
          # While this has been backpatched, we continue to disable expression indexes until further review.
          raise ReindexError, 'expression indexes are currently not supported' if index.expression?

          begin
            with_logging do
              set_statement_timeout do
                execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
              end
            end
          ensure
            cleanup_dangling_indexes
          end
        end

        private

        def with_logging
          bloat_size = index.bloat_size
          ondisk_size_before = index.ondisk_size_bytes

          logger.info(
            message: "Starting reindex of #{index}",
            index: index.identifier,
            table: index.tablename,
            estimated_bloat_bytes: bloat_size,
            index_size_before_bytes: ondisk_size_before,
            relative_bloat_level: index.relative_bloat_level
          )

          duration = Benchmark.realtime do
            yield
          end

          index.reset

          logger.info(
            message: "Finished reindex of #{index}",
            index: index.identifier,
            table: index.tablename,
            estimated_bloat_bytes: bloat_size,
            index_size_before_bytes: ondisk_size_before,
            index_size_after_bytes: index.ondisk_size_bytes,
            relative_bloat_level: index.relative_bloat_level,
            duration_s: duration.round(2)
          )
        end

        def cleanup_dangling_indexes
          Gitlab::Database::PostgresIndex.match("#{TEMPORARY_INDEX_PATTERN}$").each do |lingering_index|
            # Example lingering index name: some_index_ccnew1

            # Example prefix: 'some_index'
            prefix = lingering_index.name.gsub(/#{TEMPORARY_INDEX_PATTERN}/o, '')

            # Example suffix: '_ccnew1'
            suffix = lingering_index.name.match(/#{TEMPORARY_INDEX_PATTERN}/o)[0]

            # Only remove if the lingering index name could have been chosen
            # as a result of a REINDEX operation (considering that PostgreSQL
            # truncates index names to 63 chars and adds a suffix).
            if index.name[0...PG_MAX_INDEX_NAME_LENGTH - suffix.length] == prefix
              remove_index(lingering_index)
            end
          end
        end

        def remove_index(index)
          logger.info("Removing dangling index #{index.identifier}")

          retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
            connection: connection,
            timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
            klass: self.class,
            logger: logger
          )

          retries.run(raise_on_exhaustion: false) do
            execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
          end
        end

        def set_statement_timeout
          execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
          yield
        ensure
          execute('RESET statement_timeout')
        end

        delegate :execute, :quote_table_name, to: :connection
        def connection
          @connection ||= index.connection
        end
      end
    end
  end
end