From dabc703a2958aeb6cdd73687b0fda51f2992cc19 Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Thu, 1 Feb 2018 20:56:41 +0100 Subject: Migrate Git::Blob.batch to Gitaly Closes gitaly#985 --- Gemfile | 2 +- Gemfile.lock | 4 +- lib/gitlab/git/blob.rb | 6 +- lib/gitlab/gitaly_client/blob_service.rb | 26 ++++++ lib/gitlab/gitaly_client/blobs_stitcher.rb | 47 +++++++++++ spec/lib/gitlab/git/blob_spec.rb | 94 ++++++++++++---------- .../gitlab/gitaly_client/blobs_stitcher_spec.rb | 36 +++++++++ 7 files changed, 165 insertions(+), 50 deletions(-) create mode 100644 lib/gitlab/gitaly_client/blobs_stitcher.rb create mode 100644 spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb diff --git a/Gemfile b/Gemfile index cb381aa12c8..6395a25a346 100644 --- a/Gemfile +++ b/Gemfile @@ -410,7 +410,7 @@ group :ed25519 do end # Gitaly GRPC client -gem 'gitaly-proto', '~> 0.83.0', require: 'gitaly' +gem 'gitaly-proto', '~> 0.84.0', require: 'gitaly' # Locked until https://github.com/google/protobuf/issues/4210 is closed gem 'google-protobuf', '= 3.5.1' diff --git a/Gemfile.lock b/Gemfile.lock index d9665e38f45..9847c71352a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -285,7 +285,7 @@ GEM po_to_json (>= 1.0.0) rails (>= 3.2.0) gherkin-ruby (0.3.2) - gitaly-proto (0.83.0) + gitaly-proto (0.84.0) google-protobuf (~> 3.1) grpc (~> 1.0) github-linguist (4.7.6) @@ -1056,7 +1056,7 @@ DEPENDENCIES gettext (~> 3.2.2) gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails_js (~> 1.2.0) - gitaly-proto (~> 0.83.0) + gitaly-proto (~> 0.84.0) github-linguist (~> 4.7.0) gitlab-flowdock-git-hook (~> 1.0.1) gitlab-markup (~> 1.6.2) diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 4828301dbb9..b2fca2c16de 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -53,11 +53,7 @@ module Gitlab def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE) Gitlab::GitalyClient.migrate(:list_blobs_by_sha_path) do |is_enabled| if is_enabled - Gitlab::GitalyClient.allow_n_plus_1_calls do - blob_references.map do |sha, path| - find_by_gitaly(repository, sha, path, limit: blob_size_limit) - end - end + repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a else blob_references.map do |sha, path| find_by_rugged(repository, sha, path, limit: blob_size_limit) diff --git a/lib/gitlab/gitaly_client/blob_service.rb b/lib/gitlab/gitaly_client/blob_service.rb index d70a1a7665e..dfa0fa43b0f 100644 --- a/lib/gitlab/gitaly_client/blob_service.rb +++ b/lib/gitlab/gitaly_client/blob_service.rb @@ -1,6 +1,8 @@ module Gitlab module GitalyClient class BlobService + include Gitlab::EncodingHelper + def initialize(repository) @gitaly_repo = repository.gitaly_repository end @@ -54,6 +56,30 @@ module Gitlab end end end + + def get_blobs(revision_paths, limit = -1) + return [] if revision_paths.empty? + + revision_paths.map! do |rev, path| + Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path)) + end + + request = Gitaly::GetBlobsRequest.new( + repository: @gitaly_repo, + revision_paths: revision_paths, + limit: limit + ) + + response = GitalyClient.call( + @gitaly_repo.storage_name, + :blob_service, + :get_blobs, + request, + timeout: GitalyClient.default_timeout + ) + + GitalyClient::BlobsStitcher.new(response) + end end end end diff --git a/lib/gitlab/gitaly_client/blobs_stitcher.rb b/lib/gitlab/gitaly_client/blobs_stitcher.rb new file mode 100644 index 00000000000..5ca592ff812 --- /dev/null +++ b/lib/gitlab/gitaly_client/blobs_stitcher.rb @@ -0,0 +1,47 @@ +module Gitlab + module GitalyClient + class BlobsStitcher + include Enumerable + + def initialize(rpc_response) + @rpc_response = rpc_response + end + + def each + current_blob_data = nil + + @rpc_response.each do |msg| + begin + if msg.oid.blank? && msg.data.blank? + next + elsif msg.oid.present? + yield new_blob(current_blob_data) if current_blob_data + + current_blob_data = msg.to_h.slice(:oid, :path, :size, :revision, :mode) + current_blob_data[:data] = msg.data.dup + else + current_blob_data[:data] << msg.data + end + end + end + + yield new_blob(current_blob_data) if current_blob_data + end + + private + + def new_blob(blob_data) + Gitlab::Git::Blob.new( + id: blob_data[:oid], + mode: blob_data[:mode].to_s(8), + name: File.basename(blob_data[:path]), + path: blob_data[:path], + size: blob_data[:size], + commit_id: blob_data[:revision], + data: blob_data[:data], + binary: Gitlab::Git::Blob.binary?(blob_data[:data]) + ) + end + end + end +end diff --git a/spec/lib/gitlab/git/blob_spec.rb b/spec/lib/gitlab/git/blob_spec.rb index 8ac960133c5..59e9e1cc94c 100644 --- a/spec/lib/gitlab/git/blob_spec.rb +++ b/spec/lib/gitlab/git/blob_spec.rb @@ -178,67 +178,77 @@ describe Gitlab::Git::Blob, seed_helper: true do end describe '.batch' do - let(:blob_references) do - [ - [SeedRepo::Commit::ID, "files/ruby/popen.rb"], - [SeedRepo::Commit::ID, 'six'] - ] - end + shared_examples 'loading blobs in batch' do + let(:blob_references) do + [ + [SeedRepo::Commit::ID, "files/ruby/popen.rb"], + [SeedRepo::Commit::ID, 'six'] + ] + end - subject { described_class.batch(repository, blob_references) } + subject { described_class.batch(repository, blob_references) } - it { expect(subject.size).to eq(blob_references.size) } + it { expect(subject.size).to eq(blob_references.size) } - context 'first blob' do - let(:blob) { subject[0] } + context 'first blob' do + let(:blob) { subject[0] } - it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) } - it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) } - it { expect(blob.path).to eq("files/ruby/popen.rb") } - it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) } - it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) } - it { expect(blob.size).to eq(669) } - it { expect(blob.mode).to eq("100644") } - end + it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) } + it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) } + it { expect(blob.path).to eq("files/ruby/popen.rb") } + it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) } + it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) } + it { expect(blob.size).to eq(669) } + it { expect(blob.mode).to eq("100644") } + end - context 'second blob' do - let(:blob) { subject[1] } + context 'second blob' do + let(:blob) { subject[1] } - it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') } - it { expect(blob.data).to eq('') } - it 'does not mark the blob as binary' do - expect(blob).not_to be_binary + it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') } + it { expect(blob.data).to eq('') } + it 'does not mark the blob as binary' do + expect(blob).not_to be_binary + end end - end - context 'limiting' do - subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) } + context 'limiting' do + subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) } - context 'positive' do - let(:blob_size_limit) { 10 } + context 'positive' do + let(:blob_size_limit) { 10 } - it { expect(subject.first.data.size).to eq(10) } - end + it { expect(subject.first.data.size).to eq(10) } + end - context 'zero' do - let(:blob_size_limit) { 0 } + context 'zero' do + let(:blob_size_limit) { 0 } - it 'only loads the metadata' do - expect(subject.first.size).not_to be(0) - expect(subject.first.data).to eq('') + it 'only loads the metadata' do + expect(subject.first.size).not_to be(0) + expect(subject.first.data).to eq('') + end end - end - context 'negative' do - let(:blob_size_limit) { -1 } + context 'negative' do + let(:blob_size_limit) { -1 } - it 'ignores MAX_DATA_DISPLAY_SIZE' do - stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100) + it 'ignores MAX_DATA_DISPLAY_SIZE' do + stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100) - expect(subject.first.data.size).to eq(669) + expect(subject.first.data.size).to eq(669) + end end end end + + context 'when Gitaly list_blobs_by_sha_path feature is enabled' do + it_behaves_like 'loading blobs in batch' + end + + context 'when Gitaly list_blobs_by_sha_path feature is disabled', :disable_gitaly do + it_behaves_like 'loading blobs in batch' + end end describe '.batch_lfs_pointers' do diff --git a/spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb b/spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb new file mode 100644 index 00000000000..9db710e759e --- /dev/null +++ b/spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb @@ -0,0 +1,36 @@ +require 'spec_helper' + +describe Gitlab::GitalyClient::BlobsStitcher do + describe 'enumeration' do + it 'combines segregated blob messages together' do + messages = [ + OpenStruct.new(oid: 'abcdef1', path: 'path/to/file', size: 1642, revision: 'f00ba7', mode: 0100644, data: "first-line\n"), + OpenStruct.new(oid: '', data: 'second-line'), + OpenStruct.new(oid: '', data: '', revision: 'f00ba7', path: 'path/to/non-existent/file'), + OpenStruct.new(oid: 'abcdef2', path: 'path/to/another-file', size: 2461, revision: 'f00ba8', mode: 0100644, data: "GIF87a\x90\x01".b) + ] + + blobs = described_class.new(messages).to_a + + expect(blobs.size).to be(2) + + expect(blobs[0].id).to eq('abcdef1') + expect(blobs[0].mode).to eq('100644') + expect(blobs[0].name).to eq('file') + expect(blobs[0].path).to eq('path/to/file') + expect(blobs[0].size).to eq(1642) + expect(blobs[0].commit_id).to eq('f00ba7') + expect(blobs[0].data).to eq("first-line\nsecond-line") + expect(blobs[0].binary?).to be false + + expect(blobs[1].id).to eq('abcdef2') + expect(blobs[1].mode).to eq('100644') + expect(blobs[1].name).to eq('another-file') + expect(blobs[1].path).to eq('path/to/another-file') + expect(blobs[1].size).to eq(2461) + expect(blobs[1].commit_id).to eq('f00ba8') + expect(blobs[1].data).to eq("GIF87a\x90\x01".b) + expect(blobs[1].binary?).to be true + end + end +end -- cgit v1.2.1