diff options
author | Douwe Maan <douwe@gitlab.com> | 2017-05-29 15:49:56 +0000 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2017-05-29 15:49:56 +0000 |
commit | 26bcef97d64f449b5073ac767c02961763c20b18 (patch) | |
tree | 4a0ba9af1b670ee1c2743ff37988799142c33ff5 /db | |
parent | 7dc8961af99d2a30b0a3210f7a4ee43c8779c6d2 (diff) | |
parent | 27d5f99e508024b5c2fb8509f83e8e4c6a214865 (diff) | |
download | gitlab-ce-26bcef97d64f449b5073ac767c02961763c20b18.tar.gz |
Merge branch 'rework-authorizations-performance' into 'master'
Rework project authorizations and nested groups for better performance
See merge request !10885
Diffstat (limited to 'db')
5 files changed, 203 insertions, 1 deletions
diff --git a/db/migrate/20170503140201_reschedule_project_authorizations.rb b/db/migrate/20170503140201_reschedule_project_authorizations.rb new file mode 100644 index 00000000000..fa45adadbae --- /dev/null +++ b/db/migrate/20170503140201_reschedule_project_authorizations.rb @@ -0,0 +1,44 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class RescheduleProjectAuthorizations < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + class User < ActiveRecord::Base + self.table_name = 'users' + end + + def up + offset = 0 + batch = 5000 + start = Time.now + + loop do + relation = User.where('id > ?', offset) + user_ids = relation.limit(batch).reorder(id: :asc).pluck(:id) + + break if user_ids.empty? + + offset = user_ids.last + + # This will schedule each batch 5 minutes after the previous batch was + # scheduled. This smears out the load over time, instead of immediately + # scheduling a million jobs. + Sidekiq::Client.push_bulk( + 'queue' => 'authorized_projects', + 'args' => user_ids.zip, + 'class' => 'AuthorizedProjectsWorker', + 'at' => start.to_i + ) + + start += 5.minutes + end + end + + def down + end +end diff --git a/db/migrate/20170503140202_turn_nested_groups_into_regular_groups_for_mysql.rb b/db/migrate/20170503140202_turn_nested_groups_into_regular_groups_for_mysql.rb new file mode 100644 index 00000000000..c67690642c9 --- /dev/null +++ b/db/migrate/20170503140202_turn_nested_groups_into_regular_groups_for_mysql.rb @@ -0,0 +1,123 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +# This migration depends on code external to it. For example, it relies on +# updating a namespace to also rename directories (uploads, GitLab pages, etc). +# The alternative is to copy hundreds of lines of code into this migration, +# adjust them where needed, etc; something which doesn't work well at all. +class TurnNestedGroupsIntoRegularGroupsForMysql < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + def run_migration? + Gitlab::Database.mysql? + end + + def up + return unless run_migration? + + # For all sub-groups we need to give the right people access. We do this as + # follows: + # + # 1. Get all the ancestors for the current namespace + # 2. Get all the members of these namespaces, along with their higher access + # level + # 3. Give these members access to the current namespace + Namespace.unscoped.where('parent_id IS NOT NULL').find_each do |namespace| + rows = [] + existing = namespace.members.pluck(:user_id) + + all_members_for(namespace).each do |member| + next if existing.include?(member[:user_id]) + + rows << { + access_level: member[:access_level], + source_id: namespace.id, + source_type: 'Namespace', + user_id: member[:user_id], + notification_level: 3, # global + type: 'GroupMember', + created_at: Time.current, + updated_at: Time.current + } + end + + bulk_insert_members(rows) + + # This method relies on the parent to determine the proper path. + # Because we reset "parent_id" this method will not return the right path + # when moving namespaces. + full_path_was = namespace.send(:full_path_was) + + namespace.define_singleton_method(:full_path_was) { full_path_was } + + namespace.update!(parent_id: nil, path: new_path_for(namespace)) + end + end + + def down + # There is no way to go back from regular groups to nested groups. + end + + # Generates a new (unique) path for a namespace. + def new_path_for(namespace) + counter = 1 + base = namespace.full_path.tr('/', '-') + new_path = base + + while Namespace.unscoped.where(path: new_path).exists? + new_path = base + "-#{counter}" + counter += 1 + end + + new_path + end + + # Returns an Array containing all the ancestors of the current namespace. + # + # This method is not particularly efficient, but it's probably still faster + # than using the "routes" table. Most importantly of all, it _only_ depends + # on the namespaces table and the "parent_id" column. + def ancestors_for(namespace) + ancestors = [] + current = namespace + + while current&.parent_id + # We're using find_by(id: ...) here to deal with cases where the + # parent_id may point to a missing row. + current = Namespace.unscoped.select([:id, :parent_id]). + find_by(id: current.parent_id) + + ancestors << current.id if current + end + + ancestors + end + + # Returns a relation containing all the members that have access to any of + # the current namespace's parent namespaces. + def all_members_for(namespace) + Member. + unscoped. + select(['user_id', 'MAX(access_level) AS access_level']). + where(source_type: 'Namespace', source_id: ancestors_for(namespace)). + group(:user_id) + end + + def bulk_insert_members(rows) + return if rows.empty? + + keys = rows.first.keys + + tuples = rows.map do |row| + row.map { |(_, value)| connection.quote(value) } + end + + execute <<-EOF.strip_heredoc + INSERT INTO members (#{keys.join(', ')}) + VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')} + EOF + end +end diff --git a/db/migrate/20170504182103_add_index_project_group_links_group_id.rb b/db/migrate/20170504182103_add_index_project_group_links_group_id.rb new file mode 100644 index 00000000000..62bf641daa6 --- /dev/null +++ b/db/migrate/20170504182103_add_index_project_group_links_group_id.rb @@ -0,0 +1,19 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddIndexProjectGroupLinksGroupId < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + add_concurrent_index :project_group_links, :group_id + end + + def down + remove_concurrent_index :project_group_links, :group_id + end +end diff --git a/db/post_migrate/20170503120310_remove_users_authorized_projects_populated.rb b/db/post_migrate/20170503120310_remove_users_authorized_projects_populated.rb new file mode 100644 index 00000000000..1b44334395f --- /dev/null +++ b/db/post_migrate/20170503120310_remove_users_authorized_projects_populated.rb @@ -0,0 +1,15 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class RemoveUsersAuthorizedProjectsPopulated < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def change + remove_column :users, :authorized_projects_populated, :boolean + end +end diff --git a/db/schema.rb b/db/schema.rb index f6b513a5725..4c73f74ef1f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -928,6 +928,8 @@ ActiveRecord::Schema.define(version: 20170523091700) do t.date "expires_at" end + add_index "project_group_links", ["group_id"], name: "index_project_group_links_on_group_id", using: :btree + create_table "project_import_data", force: :cascade do |t| t.integer "project_id" t.text "data" @@ -1355,7 +1357,6 @@ ActiveRecord::Schema.define(version: 20170523091700) do t.boolean "external", default: false t.string "incoming_email_token" t.string "organization" - t.boolean "authorized_projects_populated" t.boolean "require_two_factor_authentication_from_group", default: false, null: false t.integer "two_factor_grace_period", default: 48, null: false t.boolean "ghost" |