summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Thomas <nick@gitlab.com>2019-07-08 17:11:59 +0100
committerNick Thomas <nick@gitlab.com>2019-08-16 15:41:29 +0100
commit0eff75fa2b6691b6fba31fcc2842f51debd249a9 (patch)
tree8c7885f136c617621948a5c24654f94dfb9c264f
parent648e5a942d4a8c66631354e05aa3f59206576dea (diff)
downloadgitlab-ce-0eff75fa2b6691b6fba31fcc2842f51debd249a9.tar.gz
Cache branch and tag names as Redis sets
This allows us to check inclusion for the *_exists? methods without downloading the full list of branch names, which is over 100KiB in size for gitlab-ce at the moment.
-rw-r--r--app/models/repository.rb12
-rw-r--r--changelogs/unreleased/64251-branch-name-set-cache.yml5
-rw-r--r--lib/gitlab/repository_cache_adapter.rb53
-rw-r--r--lib/gitlab/repository_set_cache.rb67
-rw-r--r--spec/lib/gitlab/repository_cache_adapter_spec.rb7
-rw-r--r--spec/lib/gitlab/repository_set_cache_spec.rb73
-rw-r--r--spec/models/repository_spec.rb52
7 files changed, 252 insertions, 17 deletions
diff --git a/app/models/repository.rb b/app/models/repository.rb
index 9d45a12fa6e..561c6579086 100644
--- a/app/models/repository.rb
+++ b/app/models/repository.rb
@@ -239,13 +239,13 @@ class Repository
def branch_exists?(branch_name)
return false unless raw_repository
- branch_names.include?(branch_name)
+ branch_names_include?(branch_name)
end
def tag_exists?(tag_name)
return false unless raw_repository
- tag_names.include?(tag_name)
+ tag_names_include?(tag_name)
end
def ref_exists?(ref)
@@ -561,10 +561,10 @@ class Repository
end
delegate :branch_names, to: :raw_repository
- cache_method :branch_names, fallback: []
+ cache_method_as_redis_set :branch_names, fallback: []
delegate :tag_names, to: :raw_repository
- cache_method :tag_names, fallback: []
+ cache_method_as_redis_set :tag_names, fallback: []
delegate :branch_count, :tag_count, :has_visible_content?, to: :raw_repository
cache_method :branch_count, fallback: 0
@@ -1126,6 +1126,10 @@ class Repository
@cache ||= Gitlab::RepositoryCache.new(self)
end
+ def redis_set_cache
+ @redis_set_cache ||= Gitlab::RepositorySetCache.new(self)
+ end
+
def request_store_cache
@request_store_cache ||= Gitlab::RepositoryCache.new(self, backend: Gitlab::SafeRequestStore)
end
diff --git a/changelogs/unreleased/64251-branch-name-set-cache.yml b/changelogs/unreleased/64251-branch-name-set-cache.yml
new file mode 100644
index 00000000000..6ce4bdf5e43
--- /dev/null
+++ b/changelogs/unreleased/64251-branch-name-set-cache.yml
@@ -0,0 +1,5 @@
+---
+title: Cache branch and tag names as Redis sets
+merge_request: 30476
+author:
+type: performance
diff --git a/lib/gitlab/repository_cache_adapter.rb b/lib/gitlab/repository_cache_adapter.rb
index e40c366ed02..75503ee1789 100644
--- a/lib/gitlab/repository_cache_adapter.rb
+++ b/lib/gitlab/repository_cache_adapter.rb
@@ -23,6 +23,37 @@ module Gitlab
end
end
+ # Caches and strongly memoizes the method as a Redis Set.
+ #
+ # This only works for methods that do not take any arguments. The method
+ # should return an Array of Strings to be cached.
+ #
+ # In addition to overriding the named method, a "name_include?" method is
+ # defined. This uses the "SISMEMBER" query to efficiently check membership
+ # without needing to load the entire set into memory.
+ #
+ # name - The name of the method to be cached.
+ # fallback - A value to fall back to if the repository does not exist, or
+ # in case of a Git error. Defaults to nil.
+ def cache_method_as_redis_set(name, fallback: nil)
+ uncached_name = alias_uncached_method(name)
+
+ define_method(name) do
+ cache_method_output_as_redis_set(name, fallback: fallback) do
+ __send__(uncached_name) # rubocop:disable GitlabSecurity/PublicSend
+ end
+ end
+
+ define_method("#{name}_include?") do |value|
+ # If the cache isn't populated, we can't rely on it
+ return redis_set_cache.include?(name, value) if redis_set_cache.exist?(name)
+
+ # Since we have to pull all branch names to populate the cache, use
+ # the data we already have to answer the query just this once
+ __send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend
+ end
+ end
+
# Caches truthy values from the method. All values are strongly memoized,
# and cached in RequestStore.
#
@@ -84,6 +115,11 @@ module Gitlab
raise NotImplementedError
end
+ # RepositorySetCache to be used. Should be overridden by the including class
+ def redis_set_cache
+ raise NotImplementedError
+ end
+
# List of cached methods. Should be overridden by the including class
def cached_methods
raise NotImplementedError
@@ -100,6 +136,18 @@ module Gitlab
end
end
+ # Caches and strongly memoizes the supplied block as a Redis Set. The result
+ # will be provided as a sorted array.
+ #
+ # name - The name of the method to be cached.
+ # fallback - A value to fall back to if the repository does not exist, or
+ # in case of a Git error. Defaults to nil.
+ def cache_method_output_as_redis_set(name, fallback: nil, &block)
+ memoize_method_output(name, fallback: fallback) do
+ redis_set_cache.fetch(name, &block).sort
+ end
+ end
+
# Caches truthy values from the supplied block. All values are strongly
# memoized, and cached in RequestStore.
#
@@ -154,6 +202,7 @@ module Gitlab
clear_memoization(memoizable_name(name))
end
+ expire_redis_set_method_caches(methods)
expire_request_store_method_caches(methods)
end
@@ -169,6 +218,10 @@ module Gitlab
end
end
+ def expire_redis_set_method_caches(methods)
+ methods.each { |name| redis_set_cache.expire(name) }
+ end
+
# All cached repository methods depend on the existence of a Git repository,
# so if the repository doesn't exist, we already know not to call it.
def fallback_early?(method_name)
diff --git a/lib/gitlab/repository_set_cache.rb b/lib/gitlab/repository_set_cache.rb
new file mode 100644
index 00000000000..fb634328a95
--- /dev/null
+++ b/lib/gitlab/repository_set_cache.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+# Interface to the Redis-backed cache store for keys that use a Redis set
+module Gitlab
+ class RepositorySetCache
+ attr_reader :repository, :namespace, :expires_in
+
+ def initialize(repository, extra_namespace: nil, expires_in: 2.weeks)
+ @repository = repository
+ @namespace = "#{repository.full_path}:#{repository.project.id}"
+ @namespace = "#{@namespace}:#{extra_namespace}" if extra_namespace
+ @expires_in = expires_in
+ end
+
+ def cache_key(type)
+ [type, namespace, 'set'].join(':')
+ end
+
+ def expire(key)
+ with { |redis| redis.del(cache_key(key)) }
+ end
+
+ def exist?(key)
+ with { |redis| redis.exists(cache_key(key)) }
+ end
+
+ def read(key)
+ with { |redis| redis.smembers(cache_key(key)) }
+ end
+
+ def write(key, value)
+ full_key = cache_key(key)
+
+ with do |redis|
+ redis.multi do
+ redis.del(full_key)
+
+ # Splitting into groups of 1000 prevents us from creating a too-long
+ # Redis command
+ value.in_groups_of(1000, false) { |subset| redis.sadd(full_key, subset) }
+
+ redis.expire(full_key, expires_in)
+ end
+ end
+
+ value
+ end
+
+ def fetch(key, &block)
+ if exist?(key)
+ read(key)
+ else
+ write(key, yield)
+ end
+ end
+
+ def include?(key, value)
+ with { |redis| redis.sismember(cache_key(key), value) }
+ end
+
+ private
+
+ def with(&blk)
+ Gitlab::Redis::Cache.with(&blk) # rubocop:disable CodeReuse/ActiveRecord
+ end
+ end
+end
diff --git a/spec/lib/gitlab/repository_cache_adapter_spec.rb b/spec/lib/gitlab/repository_cache_adapter_spec.rb
index 0295138fc3a..04fc24b6205 100644
--- a/spec/lib/gitlab/repository_cache_adapter_spec.rb
+++ b/spec/lib/gitlab/repository_cache_adapter_spec.rb
@@ -4,6 +4,7 @@ describe Gitlab::RepositoryCacheAdapter do
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
let(:cache) { repository.send(:cache) }
+ let(:redis_set_cache) { repository.send(:redis_set_cache) }
describe '#cache_method_output', :use_clean_rails_memory_store_caching do
let(:fallback) { 10 }
@@ -206,9 +207,11 @@ describe Gitlab::RepositoryCacheAdapter do
describe '#expire_method_caches' do
it 'expires the caches of the given methods' do
expect(cache).to receive(:expire).with(:rendered_readme)
- expect(cache).to receive(:expire).with(:gitignore)
+ expect(cache).to receive(:expire).with(:branch_names)
+ expect(redis_set_cache).to receive(:expire).with(:rendered_readme)
+ expect(redis_set_cache).to receive(:expire).with(:branch_names)
- repository.expire_method_caches(%i(rendered_readme gitignore))
+ repository.expire_method_caches(%i(rendered_readme branch_names))
end
it 'does not expire caches for non-existent methods' do
diff --git a/spec/lib/gitlab/repository_set_cache_spec.rb b/spec/lib/gitlab/repository_set_cache_spec.rb
new file mode 100644
index 00000000000..9695e13d842
--- /dev/null
+++ b/spec/lib/gitlab/repository_set_cache_spec.rb
@@ -0,0 +1,73 @@
+require 'spec_helper'
+
+describe Gitlab::RepositorySetCache, :clean_gitlab_redis_cache do
+ let(:project) { create(:project) }
+ let(:repository) { project.repository }
+ let(:namespace) { "#{repository.full_path}:#{project.id}" }
+ let(:cache) { described_class.new(repository) }
+
+ describe '#cache_key' do
+ subject { cache.cache_key(:foo) }
+
+ it 'includes the namespace' do
+ is_expected.to eq("foo:#{namespace}:set")
+ end
+
+ context 'with a given namespace' do
+ let(:extra_namespace) { 'my:data' }
+ let(:cache) { described_class.new(repository, extra_namespace: extra_namespace) }
+
+ it 'includes the full namespace' do
+ is_expected.to eq("foo:#{namespace}:#{extra_namespace}:set")
+ end
+ end
+ end
+
+ describe '#expire' do
+ it 'expires the given key from the cache' do
+ cache.write(:foo, ['value'])
+
+ expect(cache.read(:foo)).to contain_exactly('value')
+ expect(cache.expire(:foo)).to eq(1)
+ expect(cache.read(:foo)).to be_empty
+ end
+ end
+
+ describe '#exist?' do
+ it 'checks whether the key exists' do
+ expect(cache.exist?(:foo)).to be(false)
+
+ cache.write(:foo, ['value'])
+
+ expect(cache.exist?(:foo)).to be(true)
+ end
+ end
+
+ describe '#fetch' do
+ let(:blk) { -> { ['block value'] } }
+
+ subject { cache.fetch(:foo, &blk) }
+
+ it 'fetches the key from the cache when filled' do
+ cache.write(:foo, ['value'])
+
+ is_expected.to contain_exactly('value')
+ end
+
+ it 'writes the value of the provided block when empty' do
+ cache.expire(:foo)
+
+ is_expected.to contain_exactly('block value')
+ expect(cache.read(:foo)).to contain_exactly('block value')
+ end
+ end
+
+ describe '#include?' do
+ it 'checks inclusion in the Redis set' do
+ cache.write(:foo, ['value'])
+
+ expect(cache.include?(:foo, 'value')).to be(true)
+ expect(cache.include?(:foo, 'bar')).to be(false)
+ end
+ end
+end
diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb
index e68de2e73a8..423334c66f2 100644
--- a/spec/models/repository_spec.rb
+++ b/spec/models/repository_spec.rb
@@ -1223,36 +1223,66 @@ describe Repository do
end
describe '#branch_exists?' do
- it 'uses branch_names' do
- allow(repository).to receive(:branch_names).and_return(['foobar'])
+ let(:branch) { repository.root_ref }
- expect(repository.branch_exists?('foobar')).to eq(true)
- expect(repository.branch_exists?('master')).to eq(false)
+ subject { repository.branch_exists?(branch) }
+
+ it 'delegates to branch_names when the cache is empty' do
+ repository.expire_branches_cache
+
+ expect(repository).to receive(:branch_names).and_call_original
+ is_expected.to eq(true)
+ end
+
+ it 'uses redis set caching when the cache is filled' do
+ repository.branch_names # ensure the branch name cache is filled
+
+ expect(repository)
+ .to receive(:branch_names_include?)
+ .with(branch)
+ .and_call_original
+
+ is_expected.to eq(true)
end
end
describe '#tag_exists?' do
- it 'uses tag_names' do
- allow(repository).to receive(:tag_names).and_return(['foobar'])
+ let(:tag) { repository.tags.first.name }
+
+ subject { repository.tag_exists?(tag) }
+
+ it 'delegates to tag_names when the cache is empty' do
+ repository.expire_tags_cache
+
+ expect(repository).to receive(:tag_names).and_call_original
+ is_expected.to eq(true)
+ end
+
+ it 'uses redis set caching when the cache is filled' do
+ repository.tag_names # ensure the tag name cache is filled
+
+ expect(repository)
+ .to receive(:tag_names_include?)
+ .with(tag)
+ .and_call_original
- expect(repository.tag_exists?('foobar')).to eq(true)
- expect(repository.tag_exists?('master')).to eq(false)
+ is_expected.to eq(true)
end
end
- describe '#branch_names', :use_clean_rails_memory_store_caching do
+ describe '#branch_names', :clean_gitlab_redis_cache do
let(:fake_branch_names) { ['foobar'] }
it 'gets cached across Repository instances' do
allow(repository.raw_repository).to receive(:branch_names).once.and_return(fake_branch_names)
- expect(repository.branch_names).to eq(fake_branch_names)
+ expect(repository.branch_names).to match_array(fake_branch_names)
fresh_repository = Project.find(project.id).repository
expect(fresh_repository.object_id).not_to eq(repository.object_id)
expect(fresh_repository.raw_repository).not_to receive(:branch_names)
- expect(fresh_repository.branch_names).to eq(fake_branch_names)
+ expect(fresh_repository.branch_names).to match_array(fake_branch_names)
end
end