From 5e20e448cec833cc10d2cc4c305a056e0f29ed83 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 1 Aug 2017 13:49:57 +0200 Subject: Add Gitlab::Git::Blob.batch method --- lib/gitlab/git/blob.rb | 137 ++++++++++++++++++++++----------------- spec/lib/gitlab/git/blob_spec.rb | 71 ++++++++++++++++++++ 2 files changed, 148 insertions(+), 60 deletions(-) diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index db6cfc9671f..0b98be3a14f 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -20,66 +20,7 @@ module Gitlab if is_enabled find_by_gitaly(repository, sha, path) else - find_by_rugged(repository, sha, path) - end - end - end - - def find_by_gitaly(repository, sha, path) - path = path.sub(/\A\/*/, '') - path = '/' if path.empty? - name = File.basename(path) - entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE) - return unless entry - - case entry.type - when :COMMIT - new( - id: entry.oid, - name: name, - size: 0, - data: '', - path: path, - commit_id: sha - ) - when :BLOB - new( - id: entry.oid, - name: name, - size: entry.size, - data: entry.data.dup, - mode: entry.mode.to_s(8), - path: path, - commit_id: sha, - binary: binary?(entry.data) - ) - end - end - - def find_by_rugged(repository, sha, path) - commit = repository.lookup(sha) - root_tree = commit.tree - - blob_entry = find_entry_by_path(repository, root_tree.oid, path) - - return nil unless blob_entry - - if blob_entry[:type] == :commit - submodule_blob(blob_entry, path, sha) - else - blob = repository.lookup(blob_entry[:oid]) - - if blob - new( - id: blob.oid, - name: blob_entry[:name], - size: blob.size, - data: blob.content(MAX_DATA_DISPLAY_SIZE), - mode: blob_entry[:filemode].to_s(8), - path: path, - commit_id: sha, - binary: blob.binary? - ) + find_by_rugged(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE) end end end @@ -109,6 +50,22 @@ module Gitlab detect && detect[:type] == :binary end + # Returns an array of Blob instances, specified in blob_references as + # [[commit_sha, path], [commit_sha, path], ...]. If limit < 0 then the + # full blob contents are returned. If limit >= 0 then each blob will + # contain no more than limit bytes in its data attribute. + # + # Keep in mind that this method may allocate a lot of memory. It is up + # to the caller to limit the number of blobs and/or the content limit + # for the individual blobs. + # + def batch(repository, blob_references, limit: nil) + limit ||= MAX_DATA_DISPLAY_SIZE + blob_references.map do |sha, path| + find_by_rugged(repository, sha, path, limit: limit) + end + end + private # Recursive search of blob id by path @@ -153,6 +110,66 @@ module Gitlab commit_id: sha ) end + + def find_by_gitaly(repository, sha, path) + path = path.sub(/\A\/*/, '') + path = '/' if path.empty? + name = File.basename(path) + entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE) + return unless entry + + case entry.type + when :COMMIT + new( + id: entry.oid, + name: name, + size: 0, + data: '', + path: path, + commit_id: sha + ) + when :BLOB + new( + id: entry.oid, + name: name, + size: entry.size, + data: entry.data.dup, + mode: entry.mode.to_s(8), + path: path, + commit_id: sha, + binary: binary?(entry.data) + ) + end + end + + def find_by_rugged(repository, sha, path, limit:) + commit = repository.lookup(sha) + root_tree = commit.tree + + blob_entry = find_entry_by_path(repository, root_tree.oid, path) + + return nil unless blob_entry + + if blob_entry[:type] == :commit + submodule_blob(blob_entry, path, sha) + else + blob = repository.lookup(blob_entry[:oid]) + + if blob + new( + id: blob.oid, + name: blob_entry[:name], + size: blob.size, + # Rugged::Blob#content is expensive; don't call it if we don't have to. + data: limit.zero? ? '' : blob.content(limit), + mode: blob_entry[:filemode].to_s(8), + path: path, + commit_id: sha, + binary: blob.binary? + ) + end + end + end end def initialize(options) diff --git a/spec/lib/gitlab/git/blob_spec.rb b/spec/lib/gitlab/git/blob_spec.rb index 3c784eda4f8..ed2a781b172 100644 --- a/spec/lib/gitlab/git/blob_spec.rb +++ b/spec/lib/gitlab/git/blob_spec.rb @@ -146,6 +146,77 @@ describe Gitlab::Git::Blob, seed_helper: true do end end + describe '.batch' do + let(:blob_references) do + [ + [SeedRepo::Commit::ID, "files/ruby/popen.rb"], + [SeedRepo::Commit::ID, 'six'] + ] + end + + subject { described_class.batch(repository, blob_references) } + + it { expect(subject.size).to eq(blob_references.size) } + + context 'first blob' do + let(:blob) { subject[0] } + + it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) } + it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) } + it { expect(blob.path).to eq("files/ruby/popen.rb") } + it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) } + it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) } + it { expect(blob.size).to eq(669) } + it { expect(blob.mode).to eq("100644") } + end + + context 'second blob' do + let(:blob) { subject[1] } + + it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') } + it { expect(blob.data).to eq('') } + it 'does not mark the blob as binary' do + expect(blob).not_to be_binary + end + end + + context 'limiting' do + subject { described_class.batch(repository, blob_references, limit: limit) } + + context 'default' do + let(:limit) { nil } + + it 'limits to MAX_DATA_DISPLAY_SIZE' do + stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100) + + expect(subject.first.data.size).to eq(100) + end + end + + context 'positive' do + let(:limit) { 10 } + + it { expect(subject.first.data.size).to eq(10) } + end + + context 'zero' do + let(:limit) { 0 } + + it { expect(subject.first.data).to eq('') } + end + + context 'negative' do + let(:limit) { -1 } + + it 'ignores MAX_DATA_DISPLAY_SIZE' do + stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100) + + expect(subject.first.data.size).to eq(669) + end + end + end + end + describe 'encoding' do context 'file with russian text' do let(:blob) { Gitlab::Git::Blob.find(repository, SeedRepo::Commit::ID, "encoding/russian.rb") } -- cgit v1.2.1 From 29a1c5a126e7289dcaa710cc0933057bf274aff2 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Mon, 7 Aug 2017 16:26:50 +0200 Subject: Rename 'limit' to 'blob_size_limit' --- lib/gitlab/git/blob.rb | 13 ++++++------- spec/lib/gitlab/git/blob_spec.rb | 10 +++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 0b98be3a14f..77b81d2d437 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -51,18 +51,17 @@ module Gitlab end # Returns an array of Blob instances, specified in blob_references as - # [[commit_sha, path], [commit_sha, path], ...]. If limit < 0 then the - # full blob contents are returned. If limit >= 0 then each blob will + # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the + # full blob contents are returned. If blob_size_limit >= 0 then each blob will # contain no more than limit bytes in its data attribute. # # Keep in mind that this method may allocate a lot of memory. It is up - # to the caller to limit the number of blobs and/or the content limit - # for the individual blobs. + # to the caller to limit the number of blobs and blob_size_limit. # - def batch(repository, blob_references, limit: nil) - limit ||= MAX_DATA_DISPLAY_SIZE + def batch(repository, blob_references, blob_size_limit: nil) + blob_size_limit ||= MAX_DATA_DISPLAY_SIZE blob_references.map do |sha, path| - find_by_rugged(repository, sha, path, limit: limit) + find_by_rugged(repository, sha, path, limit: blob_size_limit) end end diff --git a/spec/lib/gitlab/git/blob_spec.rb b/spec/lib/gitlab/git/blob_spec.rb index ed2a781b172..dd4bec653f2 100644 --- a/spec/lib/gitlab/git/blob_spec.rb +++ b/spec/lib/gitlab/git/blob_spec.rb @@ -181,10 +181,10 @@ describe Gitlab::Git::Blob, seed_helper: true do end context 'limiting' do - subject { described_class.batch(repository, blob_references, limit: limit) } + subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) } context 'default' do - let(:limit) { nil } + let(:blob_size_limit) { nil } it 'limits to MAX_DATA_DISPLAY_SIZE' do stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100) @@ -194,19 +194,19 @@ describe Gitlab::Git::Blob, seed_helper: true do end context 'positive' do - let(:limit) { 10 } + let(:blob_size_limit) { 10 } it { expect(subject.first.data.size).to eq(10) } end context 'zero' do - let(:limit) { 0 } + let(:blob_size_limit) { 0 } it { expect(subject.first.data).to eq('') } end context 'negative' do - let(:limit) { -1 } + let(:blob_size_limit) { -1 } it 'ignores MAX_DATA_DISPLAY_SIZE' do stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100) -- cgit v1.2.1