summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/gitlab/database/migration_helpers.rb132
-rw-r--r--spec/lib/gitlab/database/migration_helpers_spec.rb124
2 files changed, 256 insertions, 0 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
new file mode 100644
index 00000000000..7aa8c90e6d1
--- /dev/null
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -0,0 +1,132 @@
+module Gitlab
+ module Database
+ module MigrationHelpers
+ # Creates a new index, concurrently when supported
+ #
+ # On PostgreSQL this method creates an index concurrently, on MySQL this
+ # creates a regular index.
+ #
+ # Example:
+ #
+ # add_concurrent_index :users, :some_column
+ #
+ # See Rails' `add_index` for more info on the available arguments.
+ def add_concurrent_index(*args)
+ if transaction_open?
+ raise 'add_concurrent_index can not be run inside a transaction, ' \
+ 'you can disable transactions by calling disable_ddl_transaction! ' \
+ 'in the body of your migration class'
+ end
+
+ if Database.postgresql?
+ args << { algorithm: :concurrently }
+ end
+
+ add_index(*args)
+ end
+
+ # Updates the value of a column in batches.
+ #
+ # This method updates the table in batches of 5% of the total row count.
+ # Any data inserted while running this method (or after it has finished
+ # running) is _not_ updated automatically.
+ #
+ # This method _only_ updates rows where the column's value is set to NULL.
+ #
+ # table - The name of the table.
+ # column - The name of the column to update.
+ # value - The value for the column.
+ def update_column_in_batches(table, column, value)
+ quoted_table = quote_table_name(table)
+ quoted_column = quote_column_name(column)
+ quoted_value = quote(value)
+ processed = 0
+
+ total = exec_query("SELECT COUNT(*) AS count FROM #{quoted_table}").
+ to_hash.
+ first['count'].
+ to_i
+
+ # Update in batches of 5% with an upper limit of 5000 rows.
+ batch_size = ((total / 100.0) * 5.0).ceil
+
+ while processed < total
+ start_row = exec_query(%Q{
+ SELECT id
+ FROM #{quoted_table}
+ ORDER BY id ASC
+ LIMIT 1 OFFSET #{processed}
+ }).to_hash.first
+
+ stop_row = exec_query(%Q{
+ SELECT id
+ FROM #{quoted_table}
+ ORDER BY id ASC
+ LIMIT 1 OFFSET #{processed + batch_size}
+ }.strip_heredoc).to_hash.first
+
+ query = %Q{
+ UPDATE #{quoted_table}
+ SET #{quoted_column} = #{quoted_value}
+ WHERE id >= #{start_row['id']}
+ }
+
+ if stop_row
+ query += " AND id < #{stop_row['id']}"
+ end
+
+ execute(query.strip_heredoc)
+
+ processed += batch_size
+ end
+ end
+
+ # Adds a column with a default value without locking an entire table.
+ #
+ # This method runs the following steps:
+ #
+ # 1. Add the column with a default value of NULL.
+ # 2. Update all existing rows in batches.
+ # 3. Change the default value of the column to the specified value.
+ # 4. Update any remaining rows.
+ #
+ # These steps ensure a column can be added to a large and commonly used
+ # table without locking the entire table for the duration of the table
+ # modification.
+ #
+ # table - The name of the table to update.
+ # column - The name of the column to add.
+ # type - The column type (e.g. `:integer`).
+ # default - The default value for the column.
+ # allow_null - When set to `true` the column will allow NULL values, the
+ # default is to not allow NULL values.
+ def add_column_with_default(table, column, type, default:, allow_null: false)
+ if transaction_open?
+ raise 'add_column_with_default can not be run inside a transaction, ' \
+ 'you can disable transactions by calling disable_ddl_transaction! ' \
+ 'in the body of your migration class'
+ end
+
+ transaction do
+ add_column(table, column, type, default: nil)
+
+ # Changing the default before the update ensures any newly inserted
+ # rows already use the proper default value.
+ change_column_default(table, column, default)
+ end
+
+ begin
+ transaction do
+ update_column_in_batches(table, column, default)
+ end
+ rescue Exception => error
+ remove_column(table, column)
+
+ raise error
+ end
+
+ change_column_null(table, column, false) unless allow_null
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb
new file mode 100644
index 00000000000..ad2f3cb7e45
--- /dev/null
+++ b/spec/lib/gitlab/database/migration_helpers_spec.rb
@@ -0,0 +1,124 @@
+require 'spec_helper'
+
+describe Gitlab::Database::MigrationHelpers, lib: true do
+ let(:model) do
+ Class.new do
+ include Gitlab::Database::MigrationHelpers
+
+ def method_missing(name, *args, &block)
+ ActiveRecord::Base.connection.send(name, *args, &block)
+ end
+ end.new
+ end
+
+ describe '#add_concurrent_index' do
+ context 'outside a transaction' do
+ before do
+ expect(model).to receive(:transaction_open?).and_return(false)
+ end
+
+ context 'using PostgreSQL' do
+ it 'creates the index concurrently' do
+ expect(Gitlab::Database).to receive(:postgresql?).and_return(true)
+
+ expect(model).to receive(:add_index).
+ with(:users, :foo, algorithm: :concurrently)
+
+ model.add_concurrent_index(:users, :foo)
+ end
+ end
+
+ context 'using MySQL' do
+ it 'creates a regular index' do
+ expect(Gitlab::Database).to receive(:postgresql?).and_return(false)
+
+ expect(model).to receive(:add_index).
+ with(:users, :foo)
+
+ model.add_concurrent_index(:users, :foo)
+ end
+ end
+ end
+
+ context 'inside a transaction' do
+ it 'raises RuntimeError' do
+ expect(model).to receive(:transaction_open?).and_return(true)
+
+ expect { model.add_concurrent_index(:users, :foo) }.
+ to raise_error(RuntimeError)
+ end
+ end
+ end
+
+ describe '#update_column_in_batches' do
+ before do
+ create_list(:empty_project, 5)
+ end
+
+ it 'updates all the rows in a table' do
+ model.update_column_in_batches(:projects, :import_error, 'foo')
+
+ expect(Project.where(import_error: 'foo').count).to eq(5)
+ end
+ end
+
+ describe '#add_column_with_default' do
+ context 'outside of a transaction' do
+ before do
+ expect(model).to receive(:transaction_open?).and_return(false)
+
+ expect(model).to receive(:transaction).twice.and_yield
+
+ expect(model).to receive(:add_column).
+ with(:projects, :foo, :integer, default: nil)
+
+ expect(model).to receive(:change_column_default).
+ with(:projects, :foo, 10)
+ end
+
+ it 'adds the column while allowing NULL values' do
+ expect(model).to receive(:update_column_in_batches).
+ with(:projects, :foo, 10)
+
+ expect(model).not_to receive(:change_column_null)
+
+ model.add_column_with_default(:projects, :foo, :integer,
+ default: 10,
+ allow_null: true)
+ end
+
+ it 'adds the column while not allowing NULL values' do
+ expect(model).to receive(:update_column_in_batches).
+ with(:projects, :foo, 10)
+
+ expect(model).to receive(:change_column_null).
+ with(:projects, :foo, false)
+
+ model.add_column_with_default(:projects, :foo, :integer, default: 10)
+ end
+
+ it 'removes the added column whenever updating the rows fails' do
+ expect(model).to receive(:update_column_in_batches).
+ with(:projects, :foo, 10).
+ and_raise(RuntimeError)
+
+ expect(model).to receive(:remove_column).
+ with(:projects, :foo)
+
+ expect {
+ model.add_column_with_default(:projects, :foo, :integer, default: 10)
+ }.to raise_error(RuntimeError)
+ end
+ end
+
+ context 'inside a transaction' do
+ it 'raises RuntimeError' do
+ expect(model).to receive(:transaction_open?).and_return(true)
+
+ expect {
+ model.add_column_with_default(:projects, :foo, :integer, default: 10)
+ }.to raise_error(RuntimeError)
+ end
+ end
+ end
+end