diff options
Diffstat (limited to 'lib/gitlab/database')
7 files changed, 727 insertions, 1 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index fc445ab9483..e76c9abbe04 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -26,6 +26,30 @@ module Gitlab add_index(table_name, column_name, options) end + # Removes an existed index, concurrently when supported + # + # On PostgreSQL this method removes an index concurrently. + # + # Example: + # + # remove_concurrent_index :users, :some_column + # + # See Rails' `remove_index` for more info on the available arguments. + def remove_concurrent_index(table_name, column_name, options = {}) + if transaction_open? + raise 'remove_concurrent_index can not be run inside a transaction, ' \ + 'you can disable transactions by calling disable_ddl_transaction! ' \ + 'in the body of your migration class' + end + + if Database.postgresql? + options = options.merge({ algorithm: :concurrently }) + disable_statement_timeout + end + + remove_index(table_name, options.merge({ column: column_name })) + end + # Adds a foreign key with only minimal locking on the tables involved. # # This method only requires minimal locking when using PostgreSQL. When @@ -65,7 +89,8 @@ module Gitlab ADD CONSTRAINT #{key_name} FOREIGN KEY (#{column}) REFERENCES #{target} (id) - ON DELETE #{on_delete} NOT VALID; + #{on_delete ? "ON DELETE #{on_delete}" : ''} + NOT VALID; EOF # Validate the existing constraint. This can potentially take a very @@ -90,6 +115,14 @@ module Gitlab execute('SET statement_timeout TO 0') if Database.postgresql? end + def true_value + Database.true_value + end + + def false_value + Database.false_value + end + # Updates the value of a column in batches. # # This method updates the table in batches of 5% of the total row count. @@ -226,6 +259,273 @@ module Gitlab raise error end end + + # Renames a column without requiring downtime. + # + # Concurrent renames work by using database triggers to ensure both the + # old and new column are in sync. However, this method will _not_ remove + # the triggers or the old column automatically; this needs to be done + # manually in a post-deployment migration. This can be done using the + # method `cleanup_concurrent_column_rename`. + # + # table - The name of the database table containing the column. + # old - The old column name. + # new - The new column name. + # type - The type of the new column. If no type is given the old column's + # type is used. + def rename_column_concurrently(table, old, new, type: nil) + if transaction_open? + raise 'rename_column_concurrently can not be run inside a transaction' + end + + old_col = column_for(table, old) + new_type = type || old_col.type + + add_column(table, new, new_type, + limit: old_col.limit, + precision: old_col.precision, + scale: old_col.scale) + + # We set the default value _after_ adding the column so we don't end up + # updating any existing data with the default value. This isn't + # necessary since we copy over old values further down. + change_column_default(table, new, old_col.default) if old_col.default + + trigger_name = rename_trigger_name(table, old, new) + quoted_table = quote_table_name(table) + quoted_old = quote_column_name(old) + quoted_new = quote_column_name(new) + + if Database.postgresql? + install_rename_triggers_for_postgresql(trigger_name, quoted_table, + quoted_old, quoted_new) + else + install_rename_triggers_for_mysql(trigger_name, quoted_table, + quoted_old, quoted_new) + end + + update_column_in_batches(table, new, Arel::Table.new(table)[old]) + + change_column_null(table, new, false) unless old_col.null + + copy_indexes(table, old, new) + copy_foreign_keys(table, old, new) + end + + # Changes the type of a column concurrently. + # + # table - The table containing the column. + # column - The name of the column to change. + # new_type - The new column type. + def change_column_type_concurrently(table, column, new_type) + temp_column = "#{column}_for_type_change" + + rename_column_concurrently(table, column, temp_column, type: new_type) + end + + # Performs cleanup of a concurrent type change. + # + # table - The table containing the column. + # column - The name of the column to change. + # new_type - The new column type. + def cleanup_concurrent_column_type_change(table, column) + temp_column = "#{column}_for_type_change" + + transaction do + # This has to be performed in a transaction as otherwise we might have + # inconsistent data. + cleanup_concurrent_column_rename(table, column, temp_column) + rename_column(table, temp_column, column) + end + end + + # Cleans up a concurrent column name. + # + # This method takes care of removing previously installed triggers as well + # as removing the old column. + # + # table - The name of the database table. + # old - The name of the old column. + # new - The name of the new column. + def cleanup_concurrent_column_rename(table, old, new) + trigger_name = rename_trigger_name(table, old, new) + + if Database.postgresql? + remove_rename_triggers_for_postgresql(table, trigger_name) + else + remove_rename_triggers_for_mysql(trigger_name) + end + + remove_column(table, old) + end + + # Performs a concurrent column rename when using PostgreSQL. + def install_rename_triggers_for_postgresql(trigger, table, old, new) + execute <<-EOF.strip_heredoc + CREATE OR REPLACE FUNCTION #{trigger}() + RETURNS trigger AS + $BODY$ + BEGIN + NEW.#{new} := NEW.#{old}; + RETURN NEW; + END; + $BODY$ + LANGUAGE 'plpgsql' + VOLATILE + EOF + + execute <<-EOF.strip_heredoc + CREATE TRIGGER #{trigger} + BEFORE INSERT OR UPDATE + ON #{table} + FOR EACH ROW + EXECUTE PROCEDURE #{trigger}() + EOF + end + + # Installs the triggers necessary to perform a concurrent column rename on + # MySQL. + def install_rename_triggers_for_mysql(trigger, table, old, new) + execute <<-EOF.strip_heredoc + CREATE TRIGGER #{trigger}_insert + BEFORE INSERT + ON #{table} + FOR EACH ROW + SET NEW.#{new} = NEW.#{old} + EOF + + execute <<-EOF.strip_heredoc + CREATE TRIGGER #{trigger}_update + BEFORE UPDATE + ON #{table} + FOR EACH ROW + SET NEW.#{new} = NEW.#{old} + EOF + end + + # Removes the triggers used for renaming a PostgreSQL column concurrently. + def remove_rename_triggers_for_postgresql(table, trigger) + execute("DROP TRIGGER #{trigger} ON #{table}") + execute("DROP FUNCTION #{trigger}()") + end + + # Removes the triggers used for renaming a MySQL column concurrently. + def remove_rename_triggers_for_mysql(trigger) + execute("DROP TRIGGER #{trigger}_insert") + execute("DROP TRIGGER #{trigger}_update") + end + + # Returns the (base) name to use for triggers when renaming columns. + def rename_trigger_name(table, old, new) + 'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old}_#{new}").first(12) + end + + # Returns an Array containing the indexes for the given column + def indexes_for(table, column) + column = column.to_s + + indexes(table).select { |index| index.columns.include?(column) } + end + + # Returns an Array containing the foreign keys for the given column. + def foreign_keys_for(table, column) + column = column.to_s + + foreign_keys(table).select { |fk| fk.column == column } + end + + # Copies all indexes for the old column to a new column. + # + # table - The table containing the columns and indexes. + # old - The old column. + # new - The new column. + def copy_indexes(table, old, new) + old = old.to_s + new = new.to_s + + indexes_for(table, old).each do |index| + new_columns = index.columns.map do |column| + column == old ? new : column + end + + # This is necessary as we can't properly rename indexes such as + # "ci_taggings_idx". + unless index.name.include?(old) + raise "The index #{index.name} can not be copied as it does not "\ + "mention the old column. You have to rename this index manually first." + end + + name = index.name.gsub(old, new) + + options = { + unique: index.unique, + name: name, + length: index.lengths, + order: index.orders + } + + # These options are not supported by MySQL, so we only add them if + # they were previously set. + options[:using] = index.using if index.using + options[:where] = index.where if index.where + + unless index.opclasses.blank? + opclasses = index.opclasses.dup + + # Copy the operator classes for the old column (if any) to the new + # column. + opclasses[new] = opclasses.delete(old) if opclasses[old] + + options[:opclasses] = opclasses + end + + add_concurrent_index(table, new_columns, options) + end + end + + # Copies all foreign keys for the old column to the new column. + # + # table - The table containing the columns and indexes. + # old - The old column. + # new - The new column. + def copy_foreign_keys(table, old, new) + foreign_keys_for(table, old).each do |fk| + add_concurrent_foreign_key(fk.from_table, + fk.to_table, + column: new, + on_delete: fk.on_delete) + end + end + + # Returns the column for the given table and column name. + def column_for(table, name) + name = name.to_s + + columns(table).find { |column| column.name == name } + end + + # This will replace the first occurance of a string in a column with + # the replacement + # On postgresql we can use `regexp_replace` for that. + # On mysql we find the location of the pattern, and overwrite it + # with the replacement + def replace_sql(column, pattern, replacement) + quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s) + quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s) + + if Database.mysql? + locate = Arel::Nodes::NamedFunction. + new('locate', [quoted_pattern, column]) + insert_in_place = Arel::Nodes::NamedFunction. + new('insert', [column, locate, pattern.size, quoted_replacement]) + + Arel::Nodes::SqlLiteral.new(insert_in_place.to_sql) + else + replace = Arel::Nodes::NamedFunction. + new("regexp_replace", [column, quoted_pattern, quoted_replacement]) + Arel::Nodes::SqlLiteral.new(replace.to_sql) + end + end end end end diff --git a/lib/gitlab/database/multi_threaded_migration.rb b/lib/gitlab/database/multi_threaded_migration.rb new file mode 100644 index 00000000000..7ae5a4c17c8 --- /dev/null +++ b/lib/gitlab/database/multi_threaded_migration.rb @@ -0,0 +1,52 @@ +module Gitlab + module Database + module MultiThreadedMigration + MULTI_THREAD_AR_CONNECTION = :thread_local_ar_connection + + # This overwrites the default connection method so that every thread can + # use a thread-local connection, while still supporting all of Rails' + # migration methods. + def connection + Thread.current[MULTI_THREAD_AR_CONNECTION] || + ActiveRecord::Base.connection + end + + # Starts a thread-pool for N threads, along with N threads each using a + # single connection. The provided block is yielded from inside each + # thread. + # + # Example: + # + # with_multiple_threads(4) do + # execute('SELECT ...') + # end + # + # thread_count - The number of threads to start. + # + # join - When set to true this method will join the threads, blocking the + # caller until all threads have finished running. + # + # Returns an Array containing the started threads. + def with_multiple_threads(thread_count, join: true) + pool = Gitlab::Database.create_connection_pool(thread_count) + + threads = Array.new(thread_count) do + Thread.new do + pool.with_connection do |connection| + begin + Thread.current[MULTI_THREAD_AR_CONNECTION] = connection + yield + ensure + Thread.current[MULTI_THREAD_AR_CONNECTION] = nil + end + end + end + end + + threads.each(&:join) if join + + threads + end + end + end +end diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1.rb new file mode 100644 index 00000000000..89530082cd2 --- /dev/null +++ b/lib/gitlab/database/rename_reserved_paths_migration/v1.rb @@ -0,0 +1,35 @@ +# This module can be included in migrations to make it easier to rename paths +# of `Namespace` & `Project` models certain paths would become `reserved`. +# +# If the way things are stored on the filesystem related to namespaces and +# projects ever changes. Don't update this module, or anything nested in `V1`, +# since it needs to keep functioning for all migrations using it using the state +# that the data is in at the time. Instead, create a `V2` module that implements +# the new way of reserving paths. +module Gitlab + module Database + module RenameReservedPathsMigration + module V1 + def self.included(kls) + kls.include(MigrationHelpers) + end + + def rename_wildcard_paths(one_or_more_paths) + rename_child_paths(one_or_more_paths) + paths = Array(one_or_more_paths) + RenameProjects.new(paths, self).rename_projects + end + + def rename_child_paths(one_or_more_paths) + paths = Array(one_or_more_paths) + RenameNamespaces.new(paths, self).rename_namespaces(type: :child) + end + + def rename_root_paths(paths) + paths = Array(paths) + RenameNamespaces.new(paths, self).rename_namespaces(type: :top_level) + end + end + end + end +end diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/migration_classes.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/migration_classes.rb new file mode 100644 index 00000000000..5481024db8e --- /dev/null +++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/migration_classes.rb @@ -0,0 +1,84 @@ +module Gitlab + module Database + module RenameReservedPathsMigration + module V1 + module MigrationClasses + module Routable + def full_path + if route && route.path.present? + @full_path ||= route.path + else + update_route if persisted? + + build_full_path + end + end + + def build_full_path + if parent && path + parent.full_path + '/' + path + else + path + end + end + + def update_route + prepare_route + route.save + end + + def prepare_route + route || build_route(source: self) + route.path = build_full_path + @full_path = nil + end + end + + class Namespace < ActiveRecord::Base + include MigrationClasses::Routable + self.table_name = 'namespaces' + belongs_to :parent, + class_name: "#{MigrationClasses.name}::Namespace" + has_one :route, as: :source + has_many :children, + class_name: "#{MigrationClasses.name}::Namespace", + foreign_key: :parent_id + + # Overridden to have the correct `source_type` for the `route` relation + def self.name + 'Namespace' + end + + def kind + type == 'Group' ? 'group' : 'user' + end + end + + class User < ActiveRecord::Base + self.table_name = 'users' + end + + class Route < ActiveRecord::Base + self.table_name = 'routes' + belongs_to :source, polymorphic: true + end + + class Project < ActiveRecord::Base + include MigrationClasses::Routable + has_one :route, as: :source + self.table_name = 'projects' + + def repository_storage_path + Gitlab.config.repositories.storages[repository_storage]['path'] + end + + # Overridden to have the correct `source_type` for the `route` relation + def self.name + 'Project' + end + end + end + end + end + end +end diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_base.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_base.rb new file mode 100644 index 00000000000..d60fd4bb551 --- /dev/null +++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_base.rb @@ -0,0 +1,132 @@ +module Gitlab + module Database + module RenameReservedPathsMigration + module V1 + class RenameBase + attr_reader :paths, :migration + + delegate :update_column_in_batches, + :replace_sql, + to: :migration + + def initialize(paths, migration) + @paths = paths + @migration = migration + end + + def path_patterns + @path_patterns ||= paths.flat_map { |path| ["%/#{path}", path] } + end + + def rename_path_for_routable(routable) + old_path = routable.path + old_full_path = routable.full_path + # Only remove the last occurrence of the path name to get the parent namespace path + namespace_path = remove_last_occurrence(old_full_path, old_path) + new_path = rename_path(namespace_path, old_path) + new_full_path = join_routable_path(namespace_path, new_path) + + # skips callbacks & validations + routable.class.where(id: routable). + update_all(path: new_path) + + rename_routes(old_full_path, new_full_path) + + [old_full_path, new_full_path] + end + + def rename_routes(old_full_path, new_full_path) + replace_statement = replace_sql(Route.arel_table[:path], + old_full_path, + new_full_path) + + update_column_in_batches(:routes, :path, replace_statement) do |table, query| + path_or_children = table[:path].matches_any([old_full_path, "#{old_full_path}/%"]) + query.where(path_or_children) + end + end + + def rename_path(namespace_path, path_was) + counter = 0 + path = "#{path_was}#{counter}" + + while route_exists?(join_routable_path(namespace_path, path)) + counter += 1 + path = "#{path_was}#{counter}" + end + + path + end + + def remove_last_occurrence(string, pattern) + string.reverse.sub(pattern.reverse, "").reverse + end + + def join_routable_path(namespace_path, top_level) + if namespace_path.present? + File.join(namespace_path, top_level) + else + top_level + end + end + + def route_exists?(full_path) + MigrationClasses::Route.where(Route.arel_table[:path].matches(full_path)).any? + end + + def move_pages(old_path, new_path) + move_folders(pages_dir, old_path, new_path) + end + + def move_uploads(old_path, new_path) + return unless file_storage? + + move_folders(uploads_dir, old_path, new_path) + end + + def move_folders(directory, old_relative_path, new_relative_path) + old_path = File.join(directory, old_relative_path) + return unless File.directory?(old_path) + + new_path = File.join(directory, new_relative_path) + FileUtils.mv(old_path, new_path) + end + + def remove_cached_html_for_projects(project_ids) + update_column_in_batches(:projects, :description_html, nil) do |table, query| + query.where(table[:id].in(project_ids)) + end + + update_column_in_batches(:issues, :description_html, nil) do |table, query| + query.where(table[:project_id].in(project_ids)) + end + + update_column_in_batches(:merge_requests, :description_html, nil) do |table, query| + query.where(table[:target_project_id].in(project_ids)) + end + + update_column_in_batches(:notes, :note_html, nil) do |table, query| + query.where(table[:project_id].in(project_ids)) + end + + update_column_in_batches(:milestones, :description_html, nil) do |table, query| + query.where(table[:project_id].in(project_ids)) + end + end + + def file_storage? + CarrierWave::Uploader::Base.storage == CarrierWave::Storage::File + end + + def uploads_dir + File.join(CarrierWave.root, "uploads") + end + + def pages_dir + Settings.pages.path + end + end + end + end + end +end diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb new file mode 100644 index 00000000000..2958ad4b8e5 --- /dev/null +++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_namespaces.rb @@ -0,0 +1,78 @@ +module Gitlab + module Database + module RenameReservedPathsMigration + module V1 + class RenameNamespaces < RenameBase + include Gitlab::ShellAdapter + + def rename_namespaces(type:) + namespaces_for_paths(type: type).each do |namespace| + rename_namespace(namespace) + end + end + + def namespaces_for_paths(type:) + namespaces = case type + when :child + MigrationClasses::Namespace.where.not(parent_id: nil) + when :top_level + MigrationClasses::Namespace.where(parent_id: nil) + end + with_paths = MigrationClasses::Route.arel_table[:path]. + matches_any(path_patterns) + namespaces.joins(:route).where(with_paths) + end + + def rename_namespace(namespace) + old_full_path, new_full_path = rename_path_for_routable(namespace) + + move_repositories(namespace, old_full_path, new_full_path) + move_uploads(old_full_path, new_full_path) + move_pages(old_full_path, new_full_path) + rename_user(old_full_path, new_full_path) if namespace.kind == 'user' + remove_cached_html_for_projects(projects_for_namespace(namespace).map(&:id)) + end + + def rename_user(old_username, new_username) + MigrationClasses::User.where(username: old_username) + .update_all(username: new_username) + end + + def move_repositories(namespace, old_full_path, new_full_path) + repo_paths_for_namespace(namespace).each do |repository_storage_path| + # Ensure old directory exists before moving it + gitlab_shell.add_namespace(repository_storage_path, old_full_path) + + unless gitlab_shell.mv_namespace(repository_storage_path, old_full_path, new_full_path) + message = "Exception moving path #{repository_storage_path} \ + from #{old_full_path} to #{new_full_path}" + Rails.logger.error message + end + end + end + + def repo_paths_for_namespace(namespace) + projects_for_namespace(namespace).distinct.select(:repository_storage). + map(&:repository_storage_path) + end + + def projects_for_namespace(namespace) + namespace_ids = child_ids_for_parent(namespace, ids: [namespace.id]) + namespace_or_children = MigrationClasses::Project. + arel_table[:namespace_id]. + in(namespace_ids) + MigrationClasses::Project.where(namespace_or_children) + end + + def child_ids_for_parent(namespace, ids: []) + namespace.children.each do |child| + ids << child.id + child_ids_for_parent(child, ids: ids) if child.children.any? + end + ids + end + end + end + end + end +end diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb new file mode 100644 index 00000000000..448717eb744 --- /dev/null +++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb @@ -0,0 +1,45 @@ +module Gitlab + module Database + module RenameReservedPathsMigration + module V1 + class RenameProjects < RenameBase + include Gitlab::ShellAdapter + + def rename_projects + projects_for_paths.each do |project| + rename_project(project) + end + + remove_cached_html_for_projects(projects_for_paths.map(&:id)) + end + + def rename_project(project) + old_full_path, new_full_path = rename_path_for_routable(project) + + move_repository(project, old_full_path, new_full_path) + move_repository(project, "#{old_full_path}.wiki", "#{new_full_path}.wiki") + move_uploads(old_full_path, new_full_path) + move_pages(old_full_path, new_full_path) + end + + def move_repository(project, old_path, new_path) + unless gitlab_shell.mv_repository(project.repository_storage_path, + old_path, + new_path) + Rails.logger.error "Error moving #{old_path} to #{new_path}" + end + end + + def projects_for_paths + return @projects_for_paths if @projects_for_paths + + with_paths = MigrationClasses::Route.arel_table[:path] + .matches_any(path_patterns) + + @projects_for_paths = MigrationClasses::Project.joins(:route).where(with_paths) + end + end + end + end + end +end |