summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAhmad Sherif <me@ahmadsherif.com>2018-02-01 20:56:41 +0100
committerAhmad Sherif <me@ahmadsherif.com>2018-02-05 17:38:07 +0100
commit2964d5222aa1ce4447561bf5855eef00adf8282e (patch)
tree416f4159fcf888eedd1e042754e679984c0bca3c
parentbfb32b46327576f5b5d4ff3d42198ba4601f1830 (diff)
downloadgitlab-ce-feature/migrate-blob-batch-to-gitaly.tar.gz
Migrate Git::Blob.batch to Gitalyfeature/migrate-blob-batch-to-gitaly
Closes gitaly#985
-rw-r--r--GITALY_SERVER_VERSION2
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock4
-rw-r--r--lib/gitlab/git/blob.rb6
-rw-r--r--lib/gitlab/gitaly_client/blob_service.rb24
-rw-r--r--lib/gitlab/gitaly_client/blobs_stitcher.rb47
-rw-r--r--spec/lib/gitlab/git/blob_spec.rb94
-rw-r--r--spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb36
8 files changed, 164 insertions, 51 deletions
diff --git a/GITALY_SERVER_VERSION b/GITALY_SERVER_VERSION
index bd14e8533ef..870aec4a7ca 100644
--- a/GITALY_SERVER_VERSION
+++ b/GITALY_SERVER_VERSION
@@ -1 +1 @@
-0.78.0
+0.79.0
diff --git a/Gemfile b/Gemfile
index c8e7a970769..c19b526133d 100644
--- a/Gemfile
+++ b/Gemfile
@@ -406,7 +406,7 @@ group :ed25519 do
end
# Gitaly GRPC client
-gem 'gitaly-proto', '~> 0.83.0', require: 'gitaly'
+gem 'gitaly-proto', '~> 0.84.0', require: 'gitaly'
gem 'toml-rb', '~> 0.3.15', require: false
diff --git a/Gemfile.lock b/Gemfile.lock
index cf9f160499d..ae2b79a16b5 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -285,7 +285,7 @@ GEM
po_to_json (>= 1.0.0)
rails (>= 3.2.0)
gherkin-ruby (0.3.2)
- gitaly-proto (0.83.0)
+ gitaly-proto (0.84.0)
google-protobuf (~> 3.1)
grpc (~> 1.0)
github-linguist (4.7.6)
@@ -1056,7 +1056,7 @@ DEPENDENCIES
gettext (~> 3.2.2)
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.2.0)
- gitaly-proto (~> 0.83.0)
+ gitaly-proto (~> 0.84.0)
github-linguist (~> 4.7.0)
gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-markup (~> 1.6.2)
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index 4828301dbb9..b2fca2c16de 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -53,11 +53,7 @@ module Gitlab
def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
Gitlab::GitalyClient.migrate(:list_blobs_by_sha_path) do |is_enabled|
if is_enabled
- Gitlab::GitalyClient.allow_n_plus_1_calls do
- blob_references.map do |sha, path|
- find_by_gitaly(repository, sha, path, limit: blob_size_limit)
- end
- end
+ repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a
else
blob_references.map do |sha, path|
find_by_rugged(repository, sha, path, limit: blob_size_limit)
diff --git a/lib/gitlab/gitaly_client/blob_service.rb b/lib/gitlab/gitaly_client/blob_service.rb
index d70a1a7665e..e9f6808e465 100644
--- a/lib/gitlab/gitaly_client/blob_service.rb
+++ b/lib/gitlab/gitaly_client/blob_service.rb
@@ -1,6 +1,8 @@
module Gitlab
module GitalyClient
class BlobService
+ include Gitlab::EncodingHelper
+
def initialize(repository)
@gitaly_repo = repository.gitaly_repository
end
@@ -54,6 +56,28 @@ module Gitlab
end
end
end
+
+ def get_blobs(revision_paths, limit = -1)
+ return [] if revision_paths.empty?
+
+ request = Gitaly::GetBlobsRequest.new(
+ repository: @gitaly_repo,
+ revision_paths: revision_paths.map do |rev, path|
+ Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
+ end,
+ limit: limit
+ )
+
+ response = GitalyClient.call(
+ @gitaly_repo.storage_name,
+ :blob_service,
+ :get_blobs,
+ request,
+ timeout: GitalyClient.default_timeout
+ )
+
+ GitalyClient::BlobsStitcher.new(response)
+ end
end
end
end
diff --git a/lib/gitlab/gitaly_client/blobs_stitcher.rb b/lib/gitlab/gitaly_client/blobs_stitcher.rb
new file mode 100644
index 00000000000..5ca592ff812
--- /dev/null
+++ b/lib/gitlab/gitaly_client/blobs_stitcher.rb
@@ -0,0 +1,47 @@
+module Gitlab
+ module GitalyClient
+ class BlobsStitcher
+ include Enumerable
+
+ def initialize(rpc_response)
+ @rpc_response = rpc_response
+ end
+
+ def each
+ current_blob_data = nil
+
+ @rpc_response.each do |msg|
+ begin
+ if msg.oid.blank? && msg.data.blank?
+ next
+ elsif msg.oid.present?
+ yield new_blob(current_blob_data) if current_blob_data
+
+ current_blob_data = msg.to_h.slice(:oid, :path, :size, :revision, :mode)
+ current_blob_data[:data] = msg.data.dup
+ else
+ current_blob_data[:data] << msg.data
+ end
+ end
+ end
+
+ yield new_blob(current_blob_data) if current_blob_data
+ end
+
+ private
+
+ def new_blob(blob_data)
+ Gitlab::Git::Blob.new(
+ id: blob_data[:oid],
+ mode: blob_data[:mode].to_s(8),
+ name: File.basename(blob_data[:path]),
+ path: blob_data[:path],
+ size: blob_data[:size],
+ commit_id: blob_data[:revision],
+ data: blob_data[:data],
+ binary: Gitlab::Git::Blob.binary?(blob_data[:data])
+ )
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/git/blob_spec.rb b/spec/lib/gitlab/git/blob_spec.rb
index 8ac960133c5..59e9e1cc94c 100644
--- a/spec/lib/gitlab/git/blob_spec.rb
+++ b/spec/lib/gitlab/git/blob_spec.rb
@@ -178,67 +178,77 @@ describe Gitlab::Git::Blob, seed_helper: true do
end
describe '.batch' do
- let(:blob_references) do
- [
- [SeedRepo::Commit::ID, "files/ruby/popen.rb"],
- [SeedRepo::Commit::ID, 'six']
- ]
- end
+ shared_examples 'loading blobs in batch' do
+ let(:blob_references) do
+ [
+ [SeedRepo::Commit::ID, "files/ruby/popen.rb"],
+ [SeedRepo::Commit::ID, 'six']
+ ]
+ end
- subject { described_class.batch(repository, blob_references) }
+ subject { described_class.batch(repository, blob_references) }
- it { expect(subject.size).to eq(blob_references.size) }
+ it { expect(subject.size).to eq(blob_references.size) }
- context 'first blob' do
- let(:blob) { subject[0] }
+ context 'first blob' do
+ let(:blob) { subject[0] }
- it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) }
- it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) }
- it { expect(blob.path).to eq("files/ruby/popen.rb") }
- it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) }
- it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) }
- it { expect(blob.size).to eq(669) }
- it { expect(blob.mode).to eq("100644") }
- end
+ it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) }
+ it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) }
+ it { expect(blob.path).to eq("files/ruby/popen.rb") }
+ it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) }
+ it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) }
+ it { expect(blob.size).to eq(669) }
+ it { expect(blob.mode).to eq("100644") }
+ end
- context 'second blob' do
- let(:blob) { subject[1] }
+ context 'second blob' do
+ let(:blob) { subject[1] }
- it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') }
- it { expect(blob.data).to eq('') }
- it 'does not mark the blob as binary' do
- expect(blob).not_to be_binary
+ it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') }
+ it { expect(blob.data).to eq('') }
+ it 'does not mark the blob as binary' do
+ expect(blob).not_to be_binary
+ end
end
- end
- context 'limiting' do
- subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) }
+ context 'limiting' do
+ subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) }
- context 'positive' do
- let(:blob_size_limit) { 10 }
+ context 'positive' do
+ let(:blob_size_limit) { 10 }
- it { expect(subject.first.data.size).to eq(10) }
- end
+ it { expect(subject.first.data.size).to eq(10) }
+ end
- context 'zero' do
- let(:blob_size_limit) { 0 }
+ context 'zero' do
+ let(:blob_size_limit) { 0 }
- it 'only loads the metadata' do
- expect(subject.first.size).not_to be(0)
- expect(subject.first.data).to eq('')
+ it 'only loads the metadata' do
+ expect(subject.first.size).not_to be(0)
+ expect(subject.first.data).to eq('')
+ end
end
- end
- context 'negative' do
- let(:blob_size_limit) { -1 }
+ context 'negative' do
+ let(:blob_size_limit) { -1 }
- it 'ignores MAX_DATA_DISPLAY_SIZE' do
- stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
+ it 'ignores MAX_DATA_DISPLAY_SIZE' do
+ stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
- expect(subject.first.data.size).to eq(669)
+ expect(subject.first.data.size).to eq(669)
+ end
end
end
end
+
+ context 'when Gitaly list_blobs_by_sha_path feature is enabled' do
+ it_behaves_like 'loading blobs in batch'
+ end
+
+ context 'when Gitaly list_blobs_by_sha_path feature is disabled', :disable_gitaly do
+ it_behaves_like 'loading blobs in batch'
+ end
end
describe '.batch_lfs_pointers' do
diff --git a/spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb b/spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb
new file mode 100644
index 00000000000..9db710e759e
--- /dev/null
+++ b/spec/lib/gitlab/gitaly_client/blobs_stitcher_spec.rb
@@ -0,0 +1,36 @@
+require 'spec_helper'
+
+describe Gitlab::GitalyClient::BlobsStitcher do
+ describe 'enumeration' do
+ it 'combines segregated blob messages together' do
+ messages = [
+ OpenStruct.new(oid: 'abcdef1', path: 'path/to/file', size: 1642, revision: 'f00ba7', mode: 0100644, data: "first-line\n"),
+ OpenStruct.new(oid: '', data: 'second-line'),
+ OpenStruct.new(oid: '', data: '', revision: 'f00ba7', path: 'path/to/non-existent/file'),
+ OpenStruct.new(oid: 'abcdef2', path: 'path/to/another-file', size: 2461, revision: 'f00ba8', mode: 0100644, data: "GIF87a\x90\x01".b)
+ ]
+
+ blobs = described_class.new(messages).to_a
+
+ expect(blobs.size).to be(2)
+
+ expect(blobs[0].id).to eq('abcdef1')
+ expect(blobs[0].mode).to eq('100644')
+ expect(blobs[0].name).to eq('file')
+ expect(blobs[0].path).to eq('path/to/file')
+ expect(blobs[0].size).to eq(1642)
+ expect(blobs[0].commit_id).to eq('f00ba7')
+ expect(blobs[0].data).to eq("first-line\nsecond-line")
+ expect(blobs[0].binary?).to be false
+
+ expect(blobs[1].id).to eq('abcdef2')
+ expect(blobs[1].mode).to eq('100644')
+ expect(blobs[1].name).to eq('another-file')
+ expect(blobs[1].path).to eq('path/to/another-file')
+ expect(blobs[1].size).to eq(2461)
+ expect(blobs[1].commit_id).to eq('f00ba8')
+ expect(blobs[1].data).to eq("GIF87a\x90\x01".b)
+ expect(blobs[1].binary?).to be true
+ end
+ end
+end