diff options
author | Nick Thomas <nick@gitlab.com> | 2016-12-20 13:16:07 +0000 |
---|---|---|
committer | Nick Thomas <nick@gitlab.com> | 2016-12-20 13:16:07 +0000 |
commit | 1139da2745eb28be17c8fd6aa0f5de113c0f06a9 (patch) | |
tree | cccbae8b3a7d46cc9d673b114cae143c8f70b5c5 | |
parent | a85220506eb3a7516008a1ffe67beff9550cdd1c (diff) | |
parent | f73193c328b871a9a3af803012c10d9bc1bd0904 (diff) | |
download | gitlab-ce-1139da2745eb28be17c8fd6aa0f5de113c0f06a9.tar.gz |
Merge branch 'project-authorizations-diff' into 'master'
Smarter refreshing of authorized projects
This MR reworks the way `User#refresh_authorized_projects`, resulting in a much more efficient process leading to fewer dead tuples.
Related issue: https://gitlab.com/gitlab-org/gitlab-ce/issues/25257
See merge request !7956
-rw-r--r-- | app/models/project_authorization.rb | 13 | ||||
-rw-r--r-- | app/models/user.rb | 38 | ||||
-rw-r--r-- | app/services/users/refresh_authorized_projects_service.rb | 128 | ||||
-rw-r--r-- | app/workers/authorized_projects_worker.rb | 22 | ||||
-rw-r--r-- | spec/models/project_authorization_spec.rb | 25 | ||||
-rw-r--r-- | spec/services/users/refresh_authorized_projects_service_spec.rb | 185 | ||||
-rw-r--r-- | spec/workers/authorized_projects_worker_spec.rb | 14 |
7 files changed, 364 insertions, 61 deletions
diff --git a/app/models/project_authorization.rb b/app/models/project_authorization.rb index a00d43773d9..4c7f4f5a429 100644 --- a/app/models/project_authorization.rb +++ b/app/models/project_authorization.rb @@ -5,4 +5,17 @@ class ProjectAuthorization < ActiveRecord::Base validates :project, presence: true validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true + + def self.insert_authorizations(rows, per_batch = 1000) + rows.each_slice(per_batch) do |slice| + tuples = slice.map do |tuple| + tuple.map { |value| connection.quote(value) } + end + + connection.execute <<-EOF.strip_heredoc + INSERT INTO project_authorizations (user_id, project_id, access_level) + VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')} + EOF + end + end end diff --git a/app/models/user.rb b/app/models/user.rb index d771eaff472..899a89a4eaa 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -439,22 +439,16 @@ class User < ActiveRecord::Base end def refresh_authorized_projects - transaction do - project_authorizations.delete_all - - # project_authorizations_union can return multiple records for the same - # project/user with different access_level so we take row with the maximum - # access_level - project_authorizations.connection.execute <<-SQL - INSERT INTO project_authorizations (user_id, project_id, access_level) - SELECT user_id, project_id, MAX(access_level) AS access_level - FROM (#{project_authorizations_union.to_sql}) sub - GROUP BY user_id, project_id - SQL - - unless authorized_projects_populated - update_column(:authorized_projects_populated, true) - end + Users::RefreshAuthorizedProjectsService.new(self).execute + end + + def remove_project_authorizations(project_ids) + project_authorizations.where(id: project_ids).delete_all + end + + def set_authorized_projects_column + unless authorized_projects_populated + update_column(:authorized_projects_populated, true) end end @@ -901,18 +895,6 @@ class User < ActiveRecord::Base private - # Returns a union query of projects that the user is authorized to access - def project_authorizations_union - relations = [ - personal_projects.select("#{id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"), - groups_projects.select_for_project_authorization, - projects.select_for_project_authorization, - groups.joins(:shared_projects).select_for_project_authorization - ] - - Gitlab::SQL::Union.new(relations) - end - def ci_projects_union scope = { access_level: [Gitlab::Access::MASTER, Gitlab::Access::OWNER] } groups = groups_projects.where(members: scope) diff --git a/app/services/users/refresh_authorized_projects_service.rb b/app/services/users/refresh_authorized_projects_service.rb new file mode 100644 index 00000000000..7d38ac3a374 --- /dev/null +++ b/app/services/users/refresh_authorized_projects_service.rb @@ -0,0 +1,128 @@ +module Users + # Service for refreshing the authorized projects of a user. + # + # This particular service class can not be used to update data for the same + # user concurrently. Doing so could lead to an incorrect state. To ensure this + # doesn't happen a caller must synchronize access (e.g. using + # `Gitlab::ExclusiveLease`). + # + # Usage: + # + # user = User.find_by(username: 'alice') + # service = Users::RefreshAuthorizedProjectsService.new(some_user) + # service.execute + class RefreshAuthorizedProjectsService + attr_reader :user + + LEASE_TIMEOUT = 1.minute.to_i + + # user - The User for which to refresh the authorized projects. + def initialize(user) + @user = user + + # We need an up to date User object that has access to all relations that + # may have been created earlier. The only way to ensure this is to reload + # the User object. + user.reload + end + + # This method returns the updated User object. + def execute + current = current_authorizations_per_project + fresh = fresh_access_levels_per_project + + remove = current.each_with_object([]) do |(project_id, row), array| + # rows not in the new list or with a different access level should be + # removed. + if !fresh[project_id] || fresh[project_id] != row.access_level + array << row.id + end + end + + add = fresh.each_with_object([]) do |(project_id, level), array| + # rows not in the old list or with a different access level should be + # added. + if !current[project_id] || current[project_id].access_level != level + array << [user.id, project_id, level] + end + end + + update_with_lease(remove, add) + end + + # Updates the list of authorizations using an exclusive lease. + def update_with_lease(remove = [], add = []) + lease_key = "refresh_authorized_projects:#{user.id}" + lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT) + + until uuid = lease.try_obtain + # Keep trying until we obtain the lease. If we don't do so we may end up + # not updating the list of authorized projects properly. To prevent + # hammering Redis too much we'll wait for a bit between retries. + sleep(1) + end + + begin + update_authorizations(remove, add) + ensure + Gitlab::ExclusiveLease.cancel(lease_key, uuid) + end + end + + # Updates the list of authorizations for the current user. + # + # remove - The IDs of the authorization rows to remove. + # add - Rows to insert in the form `[user id, project id, access level]` + def update_authorizations(remove = [], add = []) + return if remove.empty? && add.empty? + + User.transaction do + user.remove_project_authorizations(remove) unless remove.empty? + ProjectAuthorization.insert_authorizations(add) unless add.empty? + user.set_authorized_projects_column + end + + # Since we batch insert authorization rows, Rails' associations may get + # out of sync. As such we force a reload of the User object. + user.reload + end + + def fresh_access_levels_per_project + fresh_authorizations.each_with_object({}) do |row, hash| + hash[row.project_id] = row.access_level + end + end + + def current_authorizations_per_project + current_authorizations.each_with_object({}) do |row, hash| + hash[row.project_id] = row + end + end + + def current_authorizations + user.project_authorizations.select(:id, :project_id, :access_level) + end + + def fresh_authorizations + ProjectAuthorization. + unscoped. + select('project_id, MAX(access_level) AS access_level'). + from("(#{project_authorizations_union.to_sql}) #{ProjectAuthorization.table_name}"). + group(:project_id) + end + + private + + # Returns a union query of projects that the user is authorized to access + def project_authorizations_union + relations = [ + user.personal_projects.select("#{user.id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"), + user.groups_projects.select_for_project_authorization, + user.projects.select_for_project_authorization, + user.groups.joins(:shared_projects).select_for_project_authorization + ] + + Gitlab::SQL::Union.new(relations) + end + end +end diff --git a/app/workers/authorized_projects_worker.rb b/app/workers/authorized_projects_worker.rb index fccddb70d18..2badd0680fb 100644 --- a/app/workers/authorized_projects_worker.rb +++ b/app/workers/authorized_projects_worker.rb @@ -2,8 +2,6 @@ class AuthorizedProjectsWorker include Sidekiq::Worker include DedicatedSidekiqQueue - LEASE_TIMEOUT = 1.minute.to_i - def self.bulk_perform_async(args_list) Sidekiq::Client.push_bulk('class' => self, 'args' => args_list) end @@ -11,24 +9,6 @@ class AuthorizedProjectsWorker def perform(user_id) user = User.find_by(id: user_id) - refresh(user) if user - end - - def refresh(user) - lease_key = "refresh_authorized_projects:#{user.id}" - lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT) - - until uuid = lease.try_obtain - # Keep trying until we obtain the lease. If we don't do so we may end up - # not updating the list of authorized projects properly. To prevent - # hammering Redis too much we'll wait for a bit between retries. - sleep(1) - end - - begin - user.refresh_authorized_projects - ensure - Gitlab::ExclusiveLease.cancel(lease_key, uuid) - end + user.refresh_authorized_projects if user end end diff --git a/spec/models/project_authorization_spec.rb b/spec/models/project_authorization_spec.rb new file mode 100644 index 00000000000..33ef67f97a7 --- /dev/null +++ b/spec/models/project_authorization_spec.rb @@ -0,0 +1,25 @@ +require 'spec_helper' + +describe ProjectAuthorization do + let(:user) { create(:user) } + let(:project1) { create(:empty_project) } + let(:project2) { create(:empty_project) } + + describe '.insert_authorizations' do + it 'inserts the authorizations' do + described_class. + insert_authorizations([[user.id, project1.id, Gitlab::Access::MASTER]]) + + expect(user.project_authorizations.count).to eq(1) + end + + it 'inserts rows in batches' do + described_class.insert_authorizations([ + [user.id, project1.id, Gitlab::Access::MASTER], + [user.id, project2.id, Gitlab::Access::MASTER], + ], 1) + + expect(user.project_authorizations.count).to eq(2) + end + end +end diff --git a/spec/services/users/refresh_authorized_projects_service_spec.rb b/spec/services/users/refresh_authorized_projects_service_spec.rb new file mode 100644 index 00000000000..72c8f7cd8ec --- /dev/null +++ b/spec/services/users/refresh_authorized_projects_service_spec.rb @@ -0,0 +1,185 @@ +require 'spec_helper' + +describe Users::RefreshAuthorizedProjectsService do + let(:project) { create(:empty_project) } + let(:user) { project.namespace.owner } + let(:service) { described_class.new(user) } + + def create_authorization(project, user, access_level = Gitlab::Access::MASTER) + ProjectAuthorization. + create!(project: project, user: user, access_level: access_level) + end + + describe '#execute' do + before do + user.project_authorizations.delete_all + end + + it 'updates the authorized projects of the user' do + project2 = create(:empty_project) + to_remove = create_authorization(project2, user) + + expect(service).to receive(:update_with_lease). + with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]]) + + service.execute + end + + it 'sets the access level of a project to the highest available level' do + to_remove = create_authorization(project, user, Gitlab::Access::DEVELOPER) + + expect(service).to receive(:update_with_lease). + with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]]) + + service.execute + end + + it 'returns a User' do + expect(service.execute).to be_an_instance_of(User) + end + end + + describe '#update_with_lease', :redis do + it 'refreshes the authorizations using a lease' do + expect_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain). + and_return('foo') + + expect(Gitlab::ExclusiveLease).to receive(:cancel). + with(an_instance_of(String), 'foo') + + expect(service).to receive(:update_authorizations).with([1], []) + + service.update_with_lease([1]) + end + end + + describe '#update_authorizations' do + it 'does nothing when there are no rows to add and remove' do + expect(user).not_to receive(:remove_project_authorizations) + expect(ProjectAuthorization).not_to receive(:insert_authorizations) + expect(user).not_to receive(:set_authorized_projects_column) + + service.update_authorizations([], []) + end + + it 'removes authorizations that should be removed' do + authorization = create_authorization(project, user) + + service.update_authorizations([authorization.id]) + + expect(user.project_authorizations).to be_empty + end + + it 'inserts authorizations that should be added' do + service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]]) + + authorizations = user.project_authorizations + + expect(authorizations.length).to eq(1) + expect(authorizations[0].user_id).to eq(user.id) + expect(authorizations[0].project_id).to eq(project.id) + expect(authorizations[0].access_level).to eq(Gitlab::Access::MASTER) + end + + it 'populates the authorized projects column' do + # make sure we start with a nil value no matter what the default in the + # factory may be. + user.update(authorized_projects_populated: nil) + + service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]]) + + expect(user.authorized_projects_populated).to eq(true) + end + end + + describe '#fresh_access_levels_per_project' do + let(:hash) { service.fresh_access_levels_per_project } + + it 'returns a Hash' do + expect(hash).to be_an_instance_of(Hash) + end + + it 'sets the keys to the project IDs' do + expect(hash.keys).to eq([project.id]) + end + + it 'sets the values to the access levels' do + expect(hash.values).to eq([Gitlab::Access::MASTER]) + end + end + + describe '#current_authorizations_per_project' do + before { create_authorization(project, user) } + + let(:hash) { service.current_authorizations_per_project } + + it 'returns a Hash' do + expect(hash).to be_an_instance_of(Hash) + end + + it 'sets the keys to the project IDs' do + expect(hash.keys).to eq([project.id]) + end + + it 'sets the values to the project authorization rows' do + expect(hash.values).to eq([ProjectAuthorization.first]) + end + end + + describe '#current_authorizations' do + context 'without authorizations' do + it 'returns an empty list' do + expect(service.current_authorizations.empty?).to eq(true) + end + end + + context 'with an authorization' do + before { create_authorization(project, user) } + + let(:row) { service.current_authorizations.take } + + it 'returns the currently authorized projects' do + expect(service.current_authorizations.length).to eq(1) + end + + it 'includes the row ID for every row' do + expect(row.id).to be_a_kind_of(Numeric) + end + + it 'includes the project ID for every row' do + expect(row.project_id).to eq(project.id) + end + + it 'includes the access level for every row' do + expect(row.access_level).to eq(Gitlab::Access::MASTER) + end + end + end + + describe '#fresh_authorizations' do + it 'returns the new authorized projects' do + expect(service.fresh_authorizations.length).to eq(1) + end + + it 'returns the highest access level' do + project.team.add_guest(user) + + rows = service.fresh_authorizations.to_a + + expect(rows.length).to eq(1) + expect(rows.first.access_level).to eq(Gitlab::Access::MASTER) + end + + context 'every returned row' do + let(:row) { service.fresh_authorizations.take } + + it 'includes the project ID' do + expect(row.project_id).to eq(project.id) + end + + it 'includes the access level' do + expect(row.access_level).to eq(Gitlab::Access::MASTER) + end + end + end +end diff --git a/spec/workers/authorized_projects_worker_spec.rb b/spec/workers/authorized_projects_worker_spec.rb index 95e2458da35..b6591f272f6 100644 --- a/spec/workers/authorized_projects_worker_spec.rb +++ b/spec/workers/authorized_projects_worker_spec.rb @@ -7,27 +7,17 @@ describe AuthorizedProjectsWorker do it "refreshes user's authorized projects" do user = create(:user) - expect(worker).to receive(:refresh).with(an_instance_of(User)) + expect_any_instance_of(User).to receive(:refresh_authorized_projects) worker.perform(user.id) end context "when the user is not found" do it "does nothing" do - expect(worker).not_to receive(:refresh) + expect_any_instance_of(User).not_to receive(:refresh_authorized_projects) described_class.new.perform(-1) end end end - - describe '#refresh', redis: true do - it 'refreshes the authorized projects of the user' do - user = create(:user) - - expect(user).to receive(:refresh_authorized_projects) - - worker.refresh(user) - end - end end |