summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb
blob: 62f33bb56bca9175d26fbddadb13551d3e89a0d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# frozen_string_literal: true

module Gitlab
  module Database
    module PartitioningMigrationHelpers
      module IndexHelpers
        include Gitlab::Database::MigrationHelpers
        include Gitlab::Database::SchemaHelpers

        DuplicatedIndexesError = Class.new(StandardError)

        ERROR_SCOPE = 'index'

        # Concurrently creates a new index on a partitioned table. In concept this works similarly to
        # `add_concurrent_index`, and won't block reads or writes on the table while the index is being built.
        #
        # A special helper is required for partitioning because Postgres does not support concurrently building indexes
        # on partitioned tables. This helper concurrently adds the same index to each partition, and creates the final
        # index on the parent table once all of the partitions are indexed. This is the recommended safe way to add
        # indexes to partitioned tables.
        #
        # Example:
        #
        #     add_concurrent_partitioned_index :users, :some_column
        #
        # See Rails' `add_index` for more info on the available arguments.
        def add_concurrent_partitioned_index(table_name, column_names, options = {})
          assert_not_in_transaction_block(scope: ERROR_SCOPE)

          raise ArgumentError, 'A name is required for indexes added to partitioned tables' unless options[:name]

          partitioned_table = find_partitioned_table(table_name)

          if index_name_exists?(table_name, options[:name])
            Gitlab::AppLogger.warn "Index not created because it already exists (this may be due to an aborted" \
              " migration or similar): table_name: #{table_name}, index_name: #{options[:name]}"

            return
          end

          partitioned_table.postgres_partitions.order(:name).each do |partition|
            partition_index_name = generated_index_name(partition.identifier, options[:name])
            partition_options = options.merge(name: partition_index_name, allow_partition: true)

            add_concurrent_index(partition.identifier, column_names, partition_options)
          end

          with_lock_retries do
            add_index(table_name, column_names, **options)
          end
        end

        # Safely removes an existing index from a partitioned table. The method name is a bit inaccurate as it does not
        # drop the index concurrently, but it's named as such to maintain consistency with other similar helpers, and
        # indicate that this should be safe to use in a production environment.
        #
        # In current versions of Postgres it's impossible to drop an index concurrently, or drop an index from an
        # individual partition that exists across the entire partitioned table. As a result this helper drops the index
        # from the parent table, which automatically cascades to all partitions. While this does require an exclusive
        # lock, dropping an index is a fast operation that won't block the table for a significant period of time.
        #
        # Example:
        #
        #     remove_concurrent_partitioned_index_by_name :users, 'index_name_goes_here'
        def remove_concurrent_partitioned_index_by_name(table_name, index_name)
          assert_not_in_transaction_block(scope: ERROR_SCOPE)

          find_partitioned_table(table_name)

          unless index_name_exists?(table_name, index_name)
            Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted " \
              "migration or similar): table_name: #{table_name}, index_name: #{index_name}"

            return
          end

          with_lock_retries do
            remove_index(table_name, name: index_name)
          end
        end

        # Finds duplicate indexes for a given schema and table. This finds
        # indexes where the index definition is identical but the names are
        # different. Returns an array of arrays containing duplicate index name
        # pairs.
        #
        # Example:
        #
        #     find_duplicate_indexes('table_name_goes_here')
        def find_duplicate_indexes(table_name, schema_name: connection.current_schema)
          find_indexes(table_name, schema_name: schema_name)
            .group_by { |r| r['index_id'] }
            .select { |_, v| v.size > 1 }
            .map { |_, indexes| indexes.map { |index| index['index_name'] } }
        end

        # Retrieves a hash of index names for a given table and schema, by index
        # definition.
        #
        # Example:
        #
        #     indexes_by_definition_for_table('table_name_goes_here')
        #
        # Returns:
        #
        #     {
        #       "CREATE _ btree (created_at)" => "index_on_created_at"
        #     }
        def indexes_by_definition_for_table(table_name, schema_name: connection.current_schema)
          duplicate_indexes = find_duplicate_indexes(table_name, schema_name: schema_name)

          unless duplicate_indexes.empty?
            raise DuplicatedIndexesError, "#{table_name} has duplicate indexes: #{duplicate_indexes}"
          end

          find_indexes(table_name, schema_name: schema_name)
            .each_with_object({}) { |row, hash| hash[row['index_id']] = row['index_name'] }
        end

        # Renames indexes for a given table and schema, mapping by index
        # definition, to a hash of new index names.
        #
        # Example:
        #
        #     index_names = indexes_by_definition_for_table('source_table_name_goes_here')
        #     drop_table('source_table_name_goes_here')
        #     rename_indexes_for_table('destination_table_name_goes_here', index_names)
        def rename_indexes_for_table(table_name, new_index_names, schema_name: connection.current_schema)
          current_index_names = indexes_by_definition_for_table(table_name, schema_name: schema_name)
          rename_indexes(current_index_names, new_index_names, schema_name: schema_name)
        end

        private

        def find_indexes(table_name, schema_name: connection.current_schema)
          indexes = connection.select_all(<<~SQL, 'SQL', [schema_name, table_name])
            SELECT n.nspname AS schema_name,
                   c.relname AS table_name,
                   i.relname AS index_name,
                   regexp_replace(pg_get_indexdef(i.oid), 'INDEX .*? USING', '_') AS index_id
            FROM pg_index x
              JOIN pg_class c ON c.oid = x.indrelid
              JOIN pg_class i ON i.oid = x.indexrelid
              LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
            WHERE (c.relkind = ANY (ARRAY['r'::"char", 'm'::"char", 'p'::"char"]))
              AND (i.relkind = ANY (ARRAY['i'::"char", 'I'::"char"]))
              AND n.nspname = $1
              AND c.relname = $2;
          SQL

          indexes.to_a
        end

        def find_partitioned_table(table_name)
          partitioned_table = Gitlab::Database::PostgresPartitionedTable.find_by_name_in_current_schema(table_name)

          raise ArgumentError, "#{table_name} is not a partitioned table" unless partitioned_table

          partitioned_table
        end

        def generated_index_name(partition_name, index_name)
          object_name("#{partition_name}_#{index_name}", 'index')
        end

        def rename_indexes(from, to, schema_name: connection.current_schema)
          indexes_to_rename = from.select { |index_id, _| to.has_key?(index_id) }
          statements = indexes_to_rename.map do |index_id, index_name|
            <<~SQL
              ALTER INDEX #{connection.quote_table_name("#{schema_name}.#{connection.quote_column_name(index_name)}")}
                          RENAME TO #{connection.quote_column_name(to[index_id])}
            SQL
          end

          connection.execute(statements.join(';'))
        end
      end
    end
  end
end