summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2019-08-15 14:57:38 -0700
committerStan Hu <stanhu@gmail.com>2019-08-16 10:30:56 -0700
commit40e0df4e0690f65be683d53fdd5056af6faace9a (patch)
tree730a4e83443c0cc6d2c23da3398a74d4afa9d88c
parent2b48eec8667006fc05061d90493b50fc052243c9 (diff)
downloadgitlab-ce-sh-post-receive-cache-clear-once.tar.gz
Expire project caches once per push instead of once per refsh-post-receive-cache-clear-once
Previously `ProjectCacheWorker` would be scheduled once per ref, which would generate unnecessary I/O and load on Sidekiq, especially if many tags or branches were pushed at once. `ProjectCacheWorker` would expire three items: 1. Repository size: This only needs to be updated once per push. 2. Commit count: This only needs to be updated if the default branch is updated. 3. Project method caches: This only needs to be updated if the default branch changes, but only if certain files change (e.g. README, CHANGELOG, etc.). Because the third item requires looking at the actual changes in the commit deltas, we schedule one `ProjectCacheWorker` to handle the first two cases, and schedule a separate `ProjectCacheWorker` for the third case if it is needed. As a result, this brings down the number of `ProjectCacheWorker` jobs from N to 2. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52046
-rw-r--r--app/models/repository.rb8
-rw-r--r--app/services/git/base_hooks_service.rb12
-rw-r--r--app/workers/post_receive.rb30
-rw-r--r--app/workers/project_cache_worker.rb6
-rw-r--r--changelogs/unreleased/sh-post-receive-cache-clear-once.yml5
-rw-r--r--lib/gitlab/git_post_receive.rb9
-rw-r--r--spec/lib/gitlab/git_post_receive_spec.rb32
-rw-r--r--spec/models/repository_spec.rb20
-rw-r--r--spec/services/git/branch_hooks_service_spec.rb10
-rw-r--r--spec/workers/post_receive_spec.rb48
-rw-r--r--spec/workers/project_cache_worker_spec.rb10
11 files changed, 163 insertions, 27 deletions
diff --git a/app/models/repository.rb b/app/models/repository.rb
index 9d45a12fa6e..6f63cd32da4 100644
--- a/app/models/repository.rb
+++ b/app/models/repository.rb
@@ -389,11 +389,15 @@ class Repository
expire_statistics_caches
end
- # Runs code after a repository has been created.
- def after_create
+ def expire_status_cache
expire_exists_cache
expire_root_ref_cache
expire_emptiness_caches
+ end
+
+ # Runs code after a repository has been created.
+ def after_create
+ expire_status_cache
repository_event(:create_repository)
end
diff --git a/app/services/git/base_hooks_service.rb b/app/services/git/base_hooks_service.rb
index 1db18fcf401..3fd38444196 100644
--- a/app/services/git/base_hooks_service.rb
+++ b/app/services/git/base_hooks_service.rb
@@ -8,8 +8,6 @@ module Git
PROCESS_COMMIT_LIMIT = 100
def execute
- project.repository.after_create if project.empty_repo?
-
create_events
create_pipelines
execute_project_hooks
@@ -70,11 +68,11 @@ module Git
end
def enqueue_invalidate_cache
- ProjectCacheWorker.perform_async(
- project.id,
- invalidated_file_types,
- [:commit_count, :repository_size]
- )
+ file_types = invalidated_file_types
+
+ return unless file_types.present?
+
+ ProjectCacheWorker.perform_async(project.id, file_types, [], false)
end
def base_params
diff --git a/app/workers/post_receive.rb b/app/workers/post_receive.rb
index 622bd6f1f48..61d34981458 100644
--- a/app/workers/post_receive.rb
+++ b/app/workers/post_receive.rb
@@ -42,10 +42,8 @@ class PostReceive
user = identify_user(post_received)
return false unless user
- # Expire the branches cache so we have updated data for this push
- post_received.project.repository.expire_branches_cache if post_received.includes_branches?
- # We only need to expire tags once per push
- post_received.project.repository.expire_caches_for_tags if post_received.includes_tags?
+ # We only need to expire certain caches once per push
+ expire_caches(post_received)
post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index|
service_klass =
@@ -74,6 +72,30 @@ class PostReceive
after_project_changes_hooks(post_received, user, refs.to_a, changes)
end
+ # Expire the project, branch, and tag cache once per push. Schedule an
+ # update for the repository size and commit count if necessary.
+ def expire_caches(post_received)
+ project = post_received.project
+
+ project.repository.expire_status_cache if project.empty_repo?
+ project.repository.expire_branches_cache if post_received.includes_branches?
+ project.repository.expire_caches_for_tags if post_received.includes_tags?
+
+ enqueue_repository_cache_update(post_received)
+ end
+
+ def enqueue_repository_cache_update(post_received)
+ stats_to_invalidate = [:repository_size]
+ stats_to_invalidate << :commit_count if post_received.includes_default_branch?
+
+ ProjectCacheWorker.perform_async(
+ post_received.project.id,
+ [],
+ stats_to_invalidate,
+ true
+ )
+ end
+
def after_project_changes_hooks(post_received, user, refs, changes)
hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs)
SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks)
diff --git a/app/workers/project_cache_worker.rb b/app/workers/project_cache_worker.rb
index 4e8ea903139..5ac860c93e0 100644
--- a/app/workers/project_cache_worker.rb
+++ b/app/workers/project_cache_worker.rb
@@ -12,13 +12,15 @@ class ProjectCacheWorker
# CHANGELOG.
# statistics - An Array containing columns from ProjectStatistics to
# refresh, if empty all columns will be refreshed
+ # refresh_statistics - A boolean that determines whether project statistics should
+ # be updated.
# rubocop: disable CodeReuse/ActiveRecord
- def perform(project_id, files = [], statistics = [])
+ def perform(project_id, files = [], statistics = [], refresh_statistics = true)
project = Project.find_by(id: project_id)
return unless project
- update_statistics(project, statistics)
+ update_statistics(project, statistics) if refresh_statistics
return unless project.repository.exists?
diff --git a/changelogs/unreleased/sh-post-receive-cache-clear-once.yml b/changelogs/unreleased/sh-post-receive-cache-clear-once.yml
new file mode 100644
index 00000000000..b677adf78d9
--- /dev/null
+++ b/changelogs/unreleased/sh-post-receive-cache-clear-once.yml
@@ -0,0 +1,5 @@
+---
+title: Expire project caches once per push instead of once per ref
+merge_request: 31876
+author:
+type: performance
diff --git a/lib/gitlab/git_post_receive.rb b/lib/gitlab/git_post_receive.rb
index 24d752b8a4b..140cb6c455d 100644
--- a/lib/gitlab/git_post_receive.rb
+++ b/lib/gitlab/git_post_receive.rb
@@ -39,6 +39,15 @@ module Gitlab
end
end
+ def includes_default_branch?
+ return false unless project.default_branch.present?
+
+ enum_for(:changes_refs).any? do |_oldrev, _newrev, ref|
+ Gitlab::Git.branch_ref?(ref) &&
+ Gitlab::Git.branch_name(ref) == project.default_branch
+ end
+ end
+
private
def deserialize_changes(changes)
diff --git a/spec/lib/gitlab/git_post_receive_spec.rb b/spec/lib/gitlab/git_post_receive_spec.rb
index 4c20d945585..3bb2dc09734 100644
--- a/spec/lib/gitlab/git_post_receive_spec.rb
+++ b/spec/lib/gitlab/git_post_receive_spec.rb
@@ -3,7 +3,7 @@
require 'spec_helper'
describe ::Gitlab::GitPostReceive do
- let(:project) { create(:project) }
+ set(:project) { create(:project, :repository) }
subject { described_class.new(project, "project-#{project.id}", changes.dup, {}) }
@@ -92,4 +92,34 @@ describe ::Gitlab::GitPostReceive do
end
end
end
+
+ describe '#includes_default_branch?' do
+ context 'with no default branch' do
+ let(:changes) do
+ <<~EOF
+ 654321 210987 refs/heads/test1
+ 654322 210986 refs/tags/#{project.default_branch}
+ 654323 210985 refs/heads/test3
+ EOF
+ end
+
+ it 'returns false' do
+ expect(subject.includes_default_branch?).to be_falsey
+ end
+ end
+
+ context 'with default branch' do
+ let(:changes) do
+ <<~EOF
+ 654322 210986 refs/heads/test1
+ 654321 210987 refs/tags/test2
+ 654323 210985 refs/heads/#{project.default_branch}
+ EOF
+ end
+
+ it 'returns true' do
+ expect(subject.includes_default_branch?).to be_truthy
+ end
+ end
+ end
end
diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb
index e68de2e73a8..419e1dc2459 100644
--- a/spec/models/repository_spec.rb
+++ b/spec/models/repository_spec.rb
@@ -1815,22 +1815,36 @@ describe Repository do
end
describe '#after_create' do
+ it 'calls expire_status_cache' do
+ expect(repository).to receive(:expire_status_cache)
+
+ repository.after_create
+ end
+
+ it 'logs an event' do
+ expect(repository).to receive(:repository_event).with(:create_repository)
+
+ repository.after_create
+ end
+ end
+
+ describe '#expire_status_cache' do
it 'flushes the exists cache' do
expect(repository).to receive(:expire_exists_cache)
- repository.after_create
+ repository.expire_status_cache
end
it 'flushes the root ref cache' do
expect(repository).to receive(:expire_root_ref_cache)
- repository.after_create
+ repository.expire_status_cache
end
it 'flushes the emptiness caches' do
expect(repository).to receive(:expire_emptiness_caches)
- repository.after_create
+ repository.expire_status_cache
end
end
diff --git a/spec/services/git/branch_hooks_service_spec.rb b/spec/services/git/branch_hooks_service_spec.rb
index 8af51848b7b..3929f51a0e2 100644
--- a/spec/services/git/branch_hooks_service_spec.rb
+++ b/spec/services/git/branch_hooks_service_spec.rb
@@ -158,9 +158,13 @@ describe Git::BranchHooksService do
let(:blank_sha) { Gitlab::Git::BLANK_SHA }
def clears_cache(extended: [])
- expect(ProjectCacheWorker)
- .to receive(:perform_async)
- .with(project.id, extended, %i[commit_count repository_size])
+ expect(service).to receive(:invalidated_file_types).and_return(extended)
+
+ if extended.present?
+ expect(ProjectCacheWorker)
+ .to receive(:perform_async)
+ .with(project.id, extended, [], false)
+ end
service.execute
end
diff --git a/spec/workers/post_receive_spec.rb b/spec/workers/post_receive_spec.rb
index 3b69b81f12e..937c793f674 100644
--- a/spec/workers/post_receive_spec.rb
+++ b/spec/workers/post_receive_spec.rb
@@ -67,15 +67,22 @@ describe PostReceive do
context "branches" do
let(:changes) do
<<~EOF
- '123456 789012 refs/heads/tést1'
- '123456 789012 refs/heads/tést2'
+ 123456 789012 refs/heads/tést1
+ 123456 789012 refs/heads/tést2
EOF
end
it 'expires the branches cache' do
expect(project.repository).to receive(:expire_branches_cache).once
- described_class.new.perform(gl_repository, key_id, base64_changes)
+ perform
+ end
+
+ it 'expires the status cache' do
+ expect(project).to receive(:empty_repo?).and_return(true)
+ expect(project.repository).to receive(:expire_status_cache)
+
+ perform
end
it 'calls Git::BranchPushService' do
@@ -87,6 +94,30 @@ describe PostReceive do
perform
end
+
+ it 'schedules a cache update for repository size only' do
+ expect(ProjectCacheWorker).to receive(:perform_async)
+ .with(project.id, [], [:repository_size], true)
+
+ perform
+ end
+
+ context 'with a default branch' do
+ let(:changes) do
+ <<~EOF
+ 123456 789012 refs/heads/tést1
+ 123456 789012 refs/heads/tést2
+ 678912 123455 refs/heads/#{project.default_branch}
+ EOF
+ end
+
+ it 'schedules a cache update for commit count and size' do
+ expect(ProjectCacheWorker).to receive(:perform_async)
+ .with(project.id, [], [:repository_size, :commit_count], true)
+
+ perform
+ end
+ end
end
context "tags" do
@@ -107,7 +138,7 @@ describe PostReceive do
it 'does not expire branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache)
- described_class.new.perform(gl_repository, key_id, base64_changes)
+ perform
end
it "only invalidates tags once" do
@@ -115,7 +146,7 @@ describe PostReceive do
expect(project.repository).to receive(:expire_caches_for_tags).once.and_call_original
expect(project.repository).to receive(:expire_tags_cache).once.and_call_original
- described_class.new.perform(gl_repository, key_id, base64_changes)
+ perform
end
it "calls Git::TagPushService" do
@@ -129,6 +160,13 @@ describe PostReceive do
perform
end
+
+ it 'schedules a single ProjectCacheWorker update' do
+ expect(ProjectCacheWorker).to receive(:perform_async)
+ .with(project.id, [], [:repository_size], true)
+
+ perform
+ end
end
context "merge-requests" do
diff --git a/spec/workers/project_cache_worker_spec.rb b/spec/workers/project_cache_worker_spec.rb
index edc55920b8e..7f3c4881b89 100644
--- a/spec/workers/project_cache_worker_spec.rb
+++ b/spec/workers/project_cache_worker_spec.rb
@@ -49,6 +49,16 @@ describe ProjectCacheWorker do
worker.perform(project.id, %w(readme))
end
+ context 'with statistics disabled' do
+ let(:statistics) { [] }
+
+ it 'does not update the project statistics' do
+ expect(worker).not_to receive(:update_statistics)
+
+ worker.perform(project.id, [], [], false)
+ end
+ end
+
context 'with statistics' do
let(:statistics) { %w(repository_size) }