From 0eff75fa2b6691b6fba31fcc2842f51debd249a9 Mon Sep 17 00:00:00 2001 From: Nick Thomas Date: Mon, 8 Jul 2019 17:11:59 +0100 Subject: Cache branch and tag names as Redis sets This allows us to check inclusion for the *_exists? methods without downloading the full list of branch names, which is over 100KiB in size for gitlab-ce at the moment. --- app/models/repository.rb | 12 ++-- .../unreleased/64251-branch-name-set-cache.yml | 5 ++ lib/gitlab/repository_cache_adapter.rb | 53 ++++++++++++++++ lib/gitlab/repository_set_cache.rb | 67 ++++++++++++++++++++ spec/lib/gitlab/repository_cache_adapter_spec.rb | 7 ++- spec/lib/gitlab/repository_set_cache_spec.rb | 73 ++++++++++++++++++++++ spec/models/repository_spec.rb | 52 +++++++++++---- 7 files changed, 252 insertions(+), 17 deletions(-) create mode 100644 changelogs/unreleased/64251-branch-name-set-cache.yml create mode 100644 lib/gitlab/repository_set_cache.rb create mode 100644 spec/lib/gitlab/repository_set_cache_spec.rb diff --git a/app/models/repository.rb b/app/models/repository.rb index 9d45a12fa6e..561c6579086 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -239,13 +239,13 @@ class Repository def branch_exists?(branch_name) return false unless raw_repository - branch_names.include?(branch_name) + branch_names_include?(branch_name) end def tag_exists?(tag_name) return false unless raw_repository - tag_names.include?(tag_name) + tag_names_include?(tag_name) end def ref_exists?(ref) @@ -561,10 +561,10 @@ class Repository end delegate :branch_names, to: :raw_repository - cache_method :branch_names, fallback: [] + cache_method_as_redis_set :branch_names, fallback: [] delegate :tag_names, to: :raw_repository - cache_method :tag_names, fallback: [] + cache_method_as_redis_set :tag_names, fallback: [] delegate :branch_count, :tag_count, :has_visible_content?, to: :raw_repository cache_method :branch_count, fallback: 0 @@ -1126,6 +1126,10 @@ class Repository @cache ||= Gitlab::RepositoryCache.new(self) end + def redis_set_cache + @redis_set_cache ||= Gitlab::RepositorySetCache.new(self) + end + def request_store_cache @request_store_cache ||= Gitlab::RepositoryCache.new(self, backend: Gitlab::SafeRequestStore) end diff --git a/changelogs/unreleased/64251-branch-name-set-cache.yml b/changelogs/unreleased/64251-branch-name-set-cache.yml new file mode 100644 index 00000000000..6ce4bdf5e43 --- /dev/null +++ b/changelogs/unreleased/64251-branch-name-set-cache.yml @@ -0,0 +1,5 @@ +--- +title: Cache branch and tag names as Redis sets +merge_request: 30476 +author: +type: performance diff --git a/lib/gitlab/repository_cache_adapter.rb b/lib/gitlab/repository_cache_adapter.rb index e40c366ed02..75503ee1789 100644 --- a/lib/gitlab/repository_cache_adapter.rb +++ b/lib/gitlab/repository_cache_adapter.rb @@ -23,6 +23,37 @@ module Gitlab end end + # Caches and strongly memoizes the method as a Redis Set. + # + # This only works for methods that do not take any arguments. The method + # should return an Array of Strings to be cached. + # + # In addition to overriding the named method, a "name_include?" method is + # defined. This uses the "SISMEMBER" query to efficiently check membership + # without needing to load the entire set into memory. + # + # name - The name of the method to be cached. + # fallback - A value to fall back to if the repository does not exist, or + # in case of a Git error. Defaults to nil. + def cache_method_as_redis_set(name, fallback: nil) + uncached_name = alias_uncached_method(name) + + define_method(name) do + cache_method_output_as_redis_set(name, fallback: fallback) do + __send__(uncached_name) # rubocop:disable GitlabSecurity/PublicSend + end + end + + define_method("#{name}_include?") do |value| + # If the cache isn't populated, we can't rely on it + return redis_set_cache.include?(name, value) if redis_set_cache.exist?(name) + + # Since we have to pull all branch names to populate the cache, use + # the data we already have to answer the query just this once + __send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend + end + end + # Caches truthy values from the method. All values are strongly memoized, # and cached in RequestStore. # @@ -84,6 +115,11 @@ module Gitlab raise NotImplementedError end + # RepositorySetCache to be used. Should be overridden by the including class + def redis_set_cache + raise NotImplementedError + end + # List of cached methods. Should be overridden by the including class def cached_methods raise NotImplementedError @@ -100,6 +136,18 @@ module Gitlab end end + # Caches and strongly memoizes the supplied block as a Redis Set. The result + # will be provided as a sorted array. + # + # name - The name of the method to be cached. + # fallback - A value to fall back to if the repository does not exist, or + # in case of a Git error. Defaults to nil. + def cache_method_output_as_redis_set(name, fallback: nil, &block) + memoize_method_output(name, fallback: fallback) do + redis_set_cache.fetch(name, &block).sort + end + end + # Caches truthy values from the supplied block. All values are strongly # memoized, and cached in RequestStore. # @@ -154,6 +202,7 @@ module Gitlab clear_memoization(memoizable_name(name)) end + expire_redis_set_method_caches(methods) expire_request_store_method_caches(methods) end @@ -169,6 +218,10 @@ module Gitlab end end + def expire_redis_set_method_caches(methods) + methods.each { |name| redis_set_cache.expire(name) } + end + # All cached repository methods depend on the existence of a Git repository, # so if the repository doesn't exist, we already know not to call it. def fallback_early?(method_name) diff --git a/lib/gitlab/repository_set_cache.rb b/lib/gitlab/repository_set_cache.rb new file mode 100644 index 00000000000..fb634328a95 --- /dev/null +++ b/lib/gitlab/repository_set_cache.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +# Interface to the Redis-backed cache store for keys that use a Redis set +module Gitlab + class RepositorySetCache + attr_reader :repository, :namespace, :expires_in + + def initialize(repository, extra_namespace: nil, expires_in: 2.weeks) + @repository = repository + @namespace = "#{repository.full_path}:#{repository.project.id}" + @namespace = "#{@namespace}:#{extra_namespace}" if extra_namespace + @expires_in = expires_in + end + + def cache_key(type) + [type, namespace, 'set'].join(':') + end + + def expire(key) + with { |redis| redis.del(cache_key(key)) } + end + + def exist?(key) + with { |redis| redis.exists(cache_key(key)) } + end + + def read(key) + with { |redis| redis.smembers(cache_key(key)) } + end + + def write(key, value) + full_key = cache_key(key) + + with do |redis| + redis.multi do + redis.del(full_key) + + # Splitting into groups of 1000 prevents us from creating a too-long + # Redis command + value.in_groups_of(1000, false) { |subset| redis.sadd(full_key, subset) } + + redis.expire(full_key, expires_in) + end + end + + value + end + + def fetch(key, &block) + if exist?(key) + read(key) + else + write(key, yield) + end + end + + def include?(key, value) + with { |redis| redis.sismember(cache_key(key), value) } + end + + private + + def with(&blk) + Gitlab::Redis::Cache.with(&blk) # rubocop:disable CodeReuse/ActiveRecord + end + end +end diff --git a/spec/lib/gitlab/repository_cache_adapter_spec.rb b/spec/lib/gitlab/repository_cache_adapter_spec.rb index 0295138fc3a..04fc24b6205 100644 --- a/spec/lib/gitlab/repository_cache_adapter_spec.rb +++ b/spec/lib/gitlab/repository_cache_adapter_spec.rb @@ -4,6 +4,7 @@ describe Gitlab::RepositoryCacheAdapter do let(:project) { create(:project, :repository) } let(:repository) { project.repository } let(:cache) { repository.send(:cache) } + let(:redis_set_cache) { repository.send(:redis_set_cache) } describe '#cache_method_output', :use_clean_rails_memory_store_caching do let(:fallback) { 10 } @@ -206,9 +207,11 @@ describe Gitlab::RepositoryCacheAdapter do describe '#expire_method_caches' do it 'expires the caches of the given methods' do expect(cache).to receive(:expire).with(:rendered_readme) - expect(cache).to receive(:expire).with(:gitignore) + expect(cache).to receive(:expire).with(:branch_names) + expect(redis_set_cache).to receive(:expire).with(:rendered_readme) + expect(redis_set_cache).to receive(:expire).with(:branch_names) - repository.expire_method_caches(%i(rendered_readme gitignore)) + repository.expire_method_caches(%i(rendered_readme branch_names)) end it 'does not expire caches for non-existent methods' do diff --git a/spec/lib/gitlab/repository_set_cache_spec.rb b/spec/lib/gitlab/repository_set_cache_spec.rb new file mode 100644 index 00000000000..9695e13d842 --- /dev/null +++ b/spec/lib/gitlab/repository_set_cache_spec.rb @@ -0,0 +1,73 @@ +require 'spec_helper' + +describe Gitlab::RepositorySetCache, :clean_gitlab_redis_cache do + let(:project) { create(:project) } + let(:repository) { project.repository } + let(:namespace) { "#{repository.full_path}:#{project.id}" } + let(:cache) { described_class.new(repository) } + + describe '#cache_key' do + subject { cache.cache_key(:foo) } + + it 'includes the namespace' do + is_expected.to eq("foo:#{namespace}:set") + end + + context 'with a given namespace' do + let(:extra_namespace) { 'my:data' } + let(:cache) { described_class.new(repository, extra_namespace: extra_namespace) } + + it 'includes the full namespace' do + is_expected.to eq("foo:#{namespace}:#{extra_namespace}:set") + end + end + end + + describe '#expire' do + it 'expires the given key from the cache' do + cache.write(:foo, ['value']) + + expect(cache.read(:foo)).to contain_exactly('value') + expect(cache.expire(:foo)).to eq(1) + expect(cache.read(:foo)).to be_empty + end + end + + describe '#exist?' do + it 'checks whether the key exists' do + expect(cache.exist?(:foo)).to be(false) + + cache.write(:foo, ['value']) + + expect(cache.exist?(:foo)).to be(true) + end + end + + describe '#fetch' do + let(:blk) { -> { ['block value'] } } + + subject { cache.fetch(:foo, &blk) } + + it 'fetches the key from the cache when filled' do + cache.write(:foo, ['value']) + + is_expected.to contain_exactly('value') + end + + it 'writes the value of the provided block when empty' do + cache.expire(:foo) + + is_expected.to contain_exactly('block value') + expect(cache.read(:foo)).to contain_exactly('block value') + end + end + + describe '#include?' do + it 'checks inclusion in the Redis set' do + cache.write(:foo, ['value']) + + expect(cache.include?(:foo, 'value')).to be(true) + expect(cache.include?(:foo, 'bar')).to be(false) + end + end +end diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index e68de2e73a8..423334c66f2 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -1223,36 +1223,66 @@ describe Repository do end describe '#branch_exists?' do - it 'uses branch_names' do - allow(repository).to receive(:branch_names).and_return(['foobar']) + let(:branch) { repository.root_ref } - expect(repository.branch_exists?('foobar')).to eq(true) - expect(repository.branch_exists?('master')).to eq(false) + subject { repository.branch_exists?(branch) } + + it 'delegates to branch_names when the cache is empty' do + repository.expire_branches_cache + + expect(repository).to receive(:branch_names).and_call_original + is_expected.to eq(true) + end + + it 'uses redis set caching when the cache is filled' do + repository.branch_names # ensure the branch name cache is filled + + expect(repository) + .to receive(:branch_names_include?) + .with(branch) + .and_call_original + + is_expected.to eq(true) end end describe '#tag_exists?' do - it 'uses tag_names' do - allow(repository).to receive(:tag_names).and_return(['foobar']) + let(:tag) { repository.tags.first.name } + + subject { repository.tag_exists?(tag) } + + it 'delegates to tag_names when the cache is empty' do + repository.expire_tags_cache + + expect(repository).to receive(:tag_names).and_call_original + is_expected.to eq(true) + end + + it 'uses redis set caching when the cache is filled' do + repository.tag_names # ensure the tag name cache is filled + + expect(repository) + .to receive(:tag_names_include?) + .with(tag) + .and_call_original - expect(repository.tag_exists?('foobar')).to eq(true) - expect(repository.tag_exists?('master')).to eq(false) + is_expected.to eq(true) end end - describe '#branch_names', :use_clean_rails_memory_store_caching do + describe '#branch_names', :clean_gitlab_redis_cache do let(:fake_branch_names) { ['foobar'] } it 'gets cached across Repository instances' do allow(repository.raw_repository).to receive(:branch_names).once.and_return(fake_branch_names) - expect(repository.branch_names).to eq(fake_branch_names) + expect(repository.branch_names).to match_array(fake_branch_names) fresh_repository = Project.find(project.id).repository expect(fresh_repository.object_id).not_to eq(repository.object_id) expect(fresh_repository.raw_repository).not_to receive(:branch_names) - expect(fresh_repository.branch_names).to eq(fake_branch_names) + expect(fresh_repository.branch_names).to match_array(fake_branch_names) end end -- cgit v1.2.1