summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2016-12-06 17:31:58 +0100
committerYorick Peterse <yorickpeterse@gmail.com>2016-12-19 17:11:03 +0100
commitf73193c328b871a9a3af803012c10d9bc1bd0904 (patch)
tree016eafe1c321574b87b607325fd62e0f95779583
parenta50cd9eb4b4392004e47e57b2fa37c12def5827f (diff)
downloadgitlab-ce-project-authorizations-diff.tar.gz
Smarter refreshing of authorized projectsproject-authorizations-diff
Prior to this commit the refreshing of authorized projects was done in two steps: 1. Remove existing authorizations 2. Insert a new list of all authorizations This can lead to a high amount of dead tuples as every time all rows are being replaced. For example, if a user with 100 authorizations is given access to a new project this would lead to: * 100 rows being removed * 101 new rows being inserted This commit changes the way this system works so it only removes/inserts what is necessary. Using the above example this would lead to only 1 new row being inserted, with the initial 100 being left untouched. Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/25257
-rw-r--r--app/models/project_authorization.rb13
-rw-r--r--app/models/user.rb38
-rw-r--r--app/services/users/refresh_authorized_projects_service.rb128
-rw-r--r--app/workers/authorized_projects_worker.rb22
-rw-r--r--spec/models/project_authorization_spec.rb25
-rw-r--r--spec/services/users/refresh_authorized_projects_service_spec.rb185
-rw-r--r--spec/workers/authorized_projects_worker_spec.rb14
7 files changed, 364 insertions, 61 deletions
diff --git a/app/models/project_authorization.rb b/app/models/project_authorization.rb
index a00d43773d9..4c7f4f5a429 100644
--- a/app/models/project_authorization.rb
+++ b/app/models/project_authorization.rb
@@ -5,4 +5,17 @@ class ProjectAuthorization < ActiveRecord::Base
validates :project, presence: true
validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true
validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true
+
+ def self.insert_authorizations(rows, per_batch = 1000)
+ rows.each_slice(per_batch) do |slice|
+ tuples = slice.map do |tuple|
+ tuple.map { |value| connection.quote(value) }
+ end
+
+ connection.execute <<-EOF.strip_heredoc
+ INSERT INTO project_authorizations (user_id, project_id, access_level)
+ VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
+ EOF
+ end
+ end
end
diff --git a/app/models/user.rb b/app/models/user.rb
index 3a17c98eff6..9f5cc149361 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -443,22 +443,16 @@ class User < ActiveRecord::Base
end
def refresh_authorized_projects
- transaction do
- project_authorizations.delete_all
-
- # project_authorizations_union can return multiple records for the same
- # project/user with different access_level so we take row with the maximum
- # access_level
- project_authorizations.connection.execute <<-SQL
- INSERT INTO project_authorizations (user_id, project_id, access_level)
- SELECT user_id, project_id, MAX(access_level) AS access_level
- FROM (#{project_authorizations_union.to_sql}) sub
- GROUP BY user_id, project_id
- SQL
-
- unless authorized_projects_populated
- update_column(:authorized_projects_populated, true)
- end
+ Users::RefreshAuthorizedProjectsService.new(self).execute
+ end
+
+ def remove_project_authorizations(project_ids)
+ project_authorizations.where(id: project_ids).delete_all
+ end
+
+ def set_authorized_projects_column
+ unless authorized_projects_populated
+ update_column(:authorized_projects_populated, true)
end
end
@@ -905,18 +899,6 @@ class User < ActiveRecord::Base
private
- # Returns a union query of projects that the user is authorized to access
- def project_authorizations_union
- relations = [
- personal_projects.select("#{id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"),
- groups_projects.select_for_project_authorization,
- projects.select_for_project_authorization,
- groups.joins(:shared_projects).select_for_project_authorization
- ]
-
- Gitlab::SQL::Union.new(relations)
- end
-
def ci_projects_union
scope = { access_level: [Gitlab::Access::MASTER, Gitlab::Access::OWNER] }
groups = groups_projects.where(members: scope)
diff --git a/app/services/users/refresh_authorized_projects_service.rb b/app/services/users/refresh_authorized_projects_service.rb
new file mode 100644
index 00000000000..7d38ac3a374
--- /dev/null
+++ b/app/services/users/refresh_authorized_projects_service.rb
@@ -0,0 +1,128 @@
+module Users
+ # Service for refreshing the authorized projects of a user.
+ #
+ # This particular service class can not be used to update data for the same
+ # user concurrently. Doing so could lead to an incorrect state. To ensure this
+ # doesn't happen a caller must synchronize access (e.g. using
+ # `Gitlab::ExclusiveLease`).
+ #
+ # Usage:
+ #
+ # user = User.find_by(username: 'alice')
+ # service = Users::RefreshAuthorizedProjectsService.new(some_user)
+ # service.execute
+ class RefreshAuthorizedProjectsService
+ attr_reader :user
+
+ LEASE_TIMEOUT = 1.minute.to_i
+
+ # user - The User for which to refresh the authorized projects.
+ def initialize(user)
+ @user = user
+
+ # We need an up to date User object that has access to all relations that
+ # may have been created earlier. The only way to ensure this is to reload
+ # the User object.
+ user.reload
+ end
+
+ # This method returns the updated User object.
+ def execute
+ current = current_authorizations_per_project
+ fresh = fresh_access_levels_per_project
+
+ remove = current.each_with_object([]) do |(project_id, row), array|
+ # rows not in the new list or with a different access level should be
+ # removed.
+ if !fresh[project_id] || fresh[project_id] != row.access_level
+ array << row.id
+ end
+ end
+
+ add = fresh.each_with_object([]) do |(project_id, level), array|
+ # rows not in the old list or with a different access level should be
+ # added.
+ if !current[project_id] || current[project_id].access_level != level
+ array << [user.id, project_id, level]
+ end
+ end
+
+ update_with_lease(remove, add)
+ end
+
+ # Updates the list of authorizations using an exclusive lease.
+ def update_with_lease(remove = [], add = [])
+ lease_key = "refresh_authorized_projects:#{user.id}"
+ lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
+
+ until uuid = lease.try_obtain
+ # Keep trying until we obtain the lease. If we don't do so we may end up
+ # not updating the list of authorized projects properly. To prevent
+ # hammering Redis too much we'll wait for a bit between retries.
+ sleep(1)
+ end
+
+ begin
+ update_authorizations(remove, add)
+ ensure
+ Gitlab::ExclusiveLease.cancel(lease_key, uuid)
+ end
+ end
+
+ # Updates the list of authorizations for the current user.
+ #
+ # remove - The IDs of the authorization rows to remove.
+ # add - Rows to insert in the form `[user id, project id, access level]`
+ def update_authorizations(remove = [], add = [])
+ return if remove.empty? && add.empty?
+
+ User.transaction do
+ user.remove_project_authorizations(remove) unless remove.empty?
+ ProjectAuthorization.insert_authorizations(add) unless add.empty?
+ user.set_authorized_projects_column
+ end
+
+ # Since we batch insert authorization rows, Rails' associations may get
+ # out of sync. As such we force a reload of the User object.
+ user.reload
+ end
+
+ def fresh_access_levels_per_project
+ fresh_authorizations.each_with_object({}) do |row, hash|
+ hash[row.project_id] = row.access_level
+ end
+ end
+
+ def current_authorizations_per_project
+ current_authorizations.each_with_object({}) do |row, hash|
+ hash[row.project_id] = row
+ end
+ end
+
+ def current_authorizations
+ user.project_authorizations.select(:id, :project_id, :access_level)
+ end
+
+ def fresh_authorizations
+ ProjectAuthorization.
+ unscoped.
+ select('project_id, MAX(access_level) AS access_level').
+ from("(#{project_authorizations_union.to_sql}) #{ProjectAuthorization.table_name}").
+ group(:project_id)
+ end
+
+ private
+
+ # Returns a union query of projects that the user is authorized to access
+ def project_authorizations_union
+ relations = [
+ user.personal_projects.select("#{user.id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"),
+ user.groups_projects.select_for_project_authorization,
+ user.projects.select_for_project_authorization,
+ user.groups.joins(:shared_projects).select_for_project_authorization
+ ]
+
+ Gitlab::SQL::Union.new(relations)
+ end
+ end
+end
diff --git a/app/workers/authorized_projects_worker.rb b/app/workers/authorized_projects_worker.rb
index fccddb70d18..2badd0680fb 100644
--- a/app/workers/authorized_projects_worker.rb
+++ b/app/workers/authorized_projects_worker.rb
@@ -2,8 +2,6 @@ class AuthorizedProjectsWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
- LEASE_TIMEOUT = 1.minute.to_i
-
def self.bulk_perform_async(args_list)
Sidekiq::Client.push_bulk('class' => self, 'args' => args_list)
end
@@ -11,24 +9,6 @@ class AuthorizedProjectsWorker
def perform(user_id)
user = User.find_by(id: user_id)
- refresh(user) if user
- end
-
- def refresh(user)
- lease_key = "refresh_authorized_projects:#{user.id}"
- lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
-
- until uuid = lease.try_obtain
- # Keep trying until we obtain the lease. If we don't do so we may end up
- # not updating the list of authorized projects properly. To prevent
- # hammering Redis too much we'll wait for a bit between retries.
- sleep(1)
- end
-
- begin
- user.refresh_authorized_projects
- ensure
- Gitlab::ExclusiveLease.cancel(lease_key, uuid)
- end
+ user.refresh_authorized_projects if user
end
end
diff --git a/spec/models/project_authorization_spec.rb b/spec/models/project_authorization_spec.rb
new file mode 100644
index 00000000000..33ef67f97a7
--- /dev/null
+++ b/spec/models/project_authorization_spec.rb
@@ -0,0 +1,25 @@
+require 'spec_helper'
+
+describe ProjectAuthorization do
+ let(:user) { create(:user) }
+ let(:project1) { create(:empty_project) }
+ let(:project2) { create(:empty_project) }
+
+ describe '.insert_authorizations' do
+ it 'inserts the authorizations' do
+ described_class.
+ insert_authorizations([[user.id, project1.id, Gitlab::Access::MASTER]])
+
+ expect(user.project_authorizations.count).to eq(1)
+ end
+
+ it 'inserts rows in batches' do
+ described_class.insert_authorizations([
+ [user.id, project1.id, Gitlab::Access::MASTER],
+ [user.id, project2.id, Gitlab::Access::MASTER],
+ ], 1)
+
+ expect(user.project_authorizations.count).to eq(2)
+ end
+ end
+end
diff --git a/spec/services/users/refresh_authorized_projects_service_spec.rb b/spec/services/users/refresh_authorized_projects_service_spec.rb
new file mode 100644
index 00000000000..72c8f7cd8ec
--- /dev/null
+++ b/spec/services/users/refresh_authorized_projects_service_spec.rb
@@ -0,0 +1,185 @@
+require 'spec_helper'
+
+describe Users::RefreshAuthorizedProjectsService do
+ let(:project) { create(:empty_project) }
+ let(:user) { project.namespace.owner }
+ let(:service) { described_class.new(user) }
+
+ def create_authorization(project, user, access_level = Gitlab::Access::MASTER)
+ ProjectAuthorization.
+ create!(project: project, user: user, access_level: access_level)
+ end
+
+ describe '#execute' do
+ before do
+ user.project_authorizations.delete_all
+ end
+
+ it 'updates the authorized projects of the user' do
+ project2 = create(:empty_project)
+ to_remove = create_authorization(project2, user)
+
+ expect(service).to receive(:update_with_lease).
+ with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]])
+
+ service.execute
+ end
+
+ it 'sets the access level of a project to the highest available level' do
+ to_remove = create_authorization(project, user, Gitlab::Access::DEVELOPER)
+
+ expect(service).to receive(:update_with_lease).
+ with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]])
+
+ service.execute
+ end
+
+ it 'returns a User' do
+ expect(service.execute).to be_an_instance_of(User)
+ end
+ end
+
+ describe '#update_with_lease', :redis do
+ it 'refreshes the authorizations using a lease' do
+ expect_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).
+ and_return('foo')
+
+ expect(Gitlab::ExclusiveLease).to receive(:cancel).
+ with(an_instance_of(String), 'foo')
+
+ expect(service).to receive(:update_authorizations).with([1], [])
+
+ service.update_with_lease([1])
+ end
+ end
+
+ describe '#update_authorizations' do
+ it 'does nothing when there are no rows to add and remove' do
+ expect(user).not_to receive(:remove_project_authorizations)
+ expect(ProjectAuthorization).not_to receive(:insert_authorizations)
+ expect(user).not_to receive(:set_authorized_projects_column)
+
+ service.update_authorizations([], [])
+ end
+
+ it 'removes authorizations that should be removed' do
+ authorization = create_authorization(project, user)
+
+ service.update_authorizations([authorization.id])
+
+ expect(user.project_authorizations).to be_empty
+ end
+
+ it 'inserts authorizations that should be added' do
+ service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]])
+
+ authorizations = user.project_authorizations
+
+ expect(authorizations.length).to eq(1)
+ expect(authorizations[0].user_id).to eq(user.id)
+ expect(authorizations[0].project_id).to eq(project.id)
+ expect(authorizations[0].access_level).to eq(Gitlab::Access::MASTER)
+ end
+
+ it 'populates the authorized projects column' do
+ # make sure we start with a nil value no matter what the default in the
+ # factory may be.
+ user.update(authorized_projects_populated: nil)
+
+ service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]])
+
+ expect(user.authorized_projects_populated).to eq(true)
+ end
+ end
+
+ describe '#fresh_access_levels_per_project' do
+ let(:hash) { service.fresh_access_levels_per_project }
+
+ it 'returns a Hash' do
+ expect(hash).to be_an_instance_of(Hash)
+ end
+
+ it 'sets the keys to the project IDs' do
+ expect(hash.keys).to eq([project.id])
+ end
+
+ it 'sets the values to the access levels' do
+ expect(hash.values).to eq([Gitlab::Access::MASTER])
+ end
+ end
+
+ describe '#current_authorizations_per_project' do
+ before { create_authorization(project, user) }
+
+ let(:hash) { service.current_authorizations_per_project }
+
+ it 'returns a Hash' do
+ expect(hash).to be_an_instance_of(Hash)
+ end
+
+ it 'sets the keys to the project IDs' do
+ expect(hash.keys).to eq([project.id])
+ end
+
+ it 'sets the values to the project authorization rows' do
+ expect(hash.values).to eq([ProjectAuthorization.first])
+ end
+ end
+
+ describe '#current_authorizations' do
+ context 'without authorizations' do
+ it 'returns an empty list' do
+ expect(service.current_authorizations.empty?).to eq(true)
+ end
+ end
+
+ context 'with an authorization' do
+ before { create_authorization(project, user) }
+
+ let(:row) { service.current_authorizations.take }
+
+ it 'returns the currently authorized projects' do
+ expect(service.current_authorizations.length).to eq(1)
+ end
+
+ it 'includes the row ID for every row' do
+ expect(row.id).to be_a_kind_of(Numeric)
+ end
+
+ it 'includes the project ID for every row' do
+ expect(row.project_id).to eq(project.id)
+ end
+
+ it 'includes the access level for every row' do
+ expect(row.access_level).to eq(Gitlab::Access::MASTER)
+ end
+ end
+ end
+
+ describe '#fresh_authorizations' do
+ it 'returns the new authorized projects' do
+ expect(service.fresh_authorizations.length).to eq(1)
+ end
+
+ it 'returns the highest access level' do
+ project.team.add_guest(user)
+
+ rows = service.fresh_authorizations.to_a
+
+ expect(rows.length).to eq(1)
+ expect(rows.first.access_level).to eq(Gitlab::Access::MASTER)
+ end
+
+ context 'every returned row' do
+ let(:row) { service.fresh_authorizations.take }
+
+ it 'includes the project ID' do
+ expect(row.project_id).to eq(project.id)
+ end
+
+ it 'includes the access level' do
+ expect(row.access_level).to eq(Gitlab::Access::MASTER)
+ end
+ end
+ end
+end
diff --git a/spec/workers/authorized_projects_worker_spec.rb b/spec/workers/authorized_projects_worker_spec.rb
index 95e2458da35..b6591f272f6 100644
--- a/spec/workers/authorized_projects_worker_spec.rb
+++ b/spec/workers/authorized_projects_worker_spec.rb
@@ -7,27 +7,17 @@ describe AuthorizedProjectsWorker do
it "refreshes user's authorized projects" do
user = create(:user)
- expect(worker).to receive(:refresh).with(an_instance_of(User))
+ expect_any_instance_of(User).to receive(:refresh_authorized_projects)
worker.perform(user.id)
end
context "when the user is not found" do
it "does nothing" do
- expect(worker).not_to receive(:refresh)
+ expect_any_instance_of(User).not_to receive(:refresh_authorized_projects)
described_class.new.perform(-1)
end
end
end
-
- describe '#refresh', redis: true do
- it 'refreshes the authorized projects of the user' do
- user = create(:user)
-
- expect(user).to receive(:refresh_authorized_projects)
-
- worker.refresh(user)
- end
- end
end