summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/tables_truncate.rb
blob: daef04027421965a363d8935a4a5cae0d1f8e2ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# frozen_string_literal: true

module Gitlab
  module Database
    class TablesTruncate
      GITLAB_SCHEMAS_TO_IGNORE = %i[gitlab_geo].freeze

      def initialize(database_name:, min_batch_size:, logger: nil, until_table: nil, dry_run: false)
        @database_name = database_name
        @min_batch_size = min_batch_size
        @logger = logger
        @until_table = until_table
        @dry_run = dry_run
      end

      def execute
        raise "Cannot truncate legacy tables in single-db setup" if single_database_setup?
        raise "database is not supported" unless %w[main ci].include?(database_name)

        logger&.info "DRY RUN:" if dry_run

        schemas_for_connection = Gitlab::Database.gitlab_schemas_for_connection(connection)
        tables_to_truncate = Gitlab::Database::GitlabSchema.tables_to_schema.reject do |_, schema_name|
          GITLAB_SCHEMAS_TO_IGNORE.union(schemas_for_connection).include?(schema_name)
        end.keys

        Gitlab::Database::SharedModel.using_connection(connection) do
          Postgresql::DetachedPartition.find_each do |detached_partition|
            next if GITLAB_SCHEMAS_TO_IGNORE.union(schemas_for_connection).include?(detached_partition.table_schema)

            tables_to_truncate << detached_partition.fully_qualified_table_name
          end
        end

        tables_sorted = Gitlab::Database::TablesSortedByForeignKeys.new(connection, tables_to_truncate).execute
        # Checking if all the tables have the write-lock triggers
        # to make sure we are deleting the right tables on the right database.
        tables_sorted.flatten.each do |table_name|
          lock_writes_manager = Gitlab::Database::LockWritesManager.new(
            table_name: table_name,
            connection: connection,
            database_name: database_name,
            with_retries: true,
            logger: logger,
            dry_run: dry_run
          )

          unless lock_writes_manager.table_locked_for_writes?
            raise "Table '#{table_name}' is not locked for writes. Run the rake task gitlab:db:lock_writes first"
          end
        end

        if until_table
          table_index = tables_sorted.find_index { |tables_group| tables_group.include?(until_table) }
          raise "The table '#{until_table}' is not within the truncated tables" if table_index.nil?

          tables_sorted = tables_sorted[0..table_index]
        end

        # min_batch_size is the minimum number of new tables to truncate at each stage.
        # But in each stage we have also have to truncate the already truncated tables in the previous stages
        logger&.info "Truncating legacy tables for the database #{database_name}"
        truncate_tables_in_batches(tables_sorted)
      end

      private

      attr_accessor :database_name, :min_batch_size, :logger, :dry_run, :until_table

      def connection
        @connection ||= Gitlab::Database.database_base_models[database_name].connection
      end

      def truncate_tables_in_batches(tables_sorted)
        truncated_tables = []

        tables_sorted.flatten.each do |table|
          table_name_without_schema = ActiveRecord::ConnectionAdapters::PostgreSQL::Utils
            .extract_schema_qualified_name(table)
            .identifier

          sql_statement = "SELECT set_config('lock_writes.#{table_name_without_schema}', 'false', false)"
          logger&.info(sql_statement)
          connection.execute(sql_statement) unless dry_run

          # Temporarily unlocking writes on the attached partitions of the table.
          # Because in some cases they might have been locked for writes as well, when they used to be
          # normal tables before being converted into attached partitions.
          Gitlab::Database::SharedModel.using_connection(connection) do
            table_partitions = Gitlab::Database::PostgresPartition.for_parent_table(table_name_without_schema)
            table_partitions.each do |table_partition|
              partition_name_without_schema = ActiveRecord::ConnectionAdapters::PostgreSQL::Utils
                .extract_schema_qualified_name(table_partition.identifier)
                .identifier

              sql_statement = "SELECT set_config('lock_writes.#{partition_name_without_schema}', 'false', false)"
              logger&.info(sql_statement)
              connection.execute(sql_statement) unless dry_run
            end
          end
        end

        # We do the truncation in stages to avoid high IO
        # In each stage, we truncate the new tables along with the already truncated
        # tables before. That's because PostgreSQL doesn't allow to truncate any table (A)
        # without truncating any other table (B) that has a Foreign Key pointing to the table (A).
        # even if table (B) is empty, because it has been already truncated in a previous stage.
        tables_sorted.in_groups_of(min_batch_size, false).each do |tables_groups|
          new_tables_to_truncate = tables_groups.flatten
          logger&.info "= New tables to truncate: #{new_tables_to_truncate.join(', ')}"
          truncated_tables.push(*new_tables_to_truncate).tap(&:sort!)
          sql_statements = [
            "SET LOCAL statement_timeout = 0",
            "SET LOCAL lock_timeout = 0",
            "TRUNCATE TABLE #{truncated_tables.join(', ')} RESTRICT"
          ]

          sql_statements.each { |sql_statement| logger&.info(sql_statement) }

          next if dry_run

          connection.transaction do
            sql_statements.each { |sql_statement| connection.execute(sql_statement) }
          end
        end
      end

      def single_database_setup?
        return true unless Gitlab::Database.has_config?(:ci)

        ci_base_model = Gitlab::Database.database_base_models[:ci]
        !!Gitlab::Database.db_config_share_with(ci_base_model.connection_db_config)
      end
    end
  end
end