From 1885691b035a29948fa009bc505c59a97fb79ae6 Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Wed, 24 Apr 2019 14:13:32 +0100 Subject: Ensure that we only request blobs in one batch Blob.lazy adds a blob to a batch to load at a later point, using the BatchLoader library. Whenever any lazy blob's attributes are accessed, all lazy blobs requested to that point will be loaded. BatchLoader, the library we use for this, should only request items in a batch once. That is, if we have these batches: 1. a, b, c 2. d, e, f Then a, b, and c should only be requested in the first batch. But if you modify the list of items in the batch, then the second batch will request a, b, c, d, e, f, which is almost certainly not what we want! https://github.com/exAspArk/batch-loader/issues/44 is the upstream issue for this, but we can also solve this in our application by not modifying the arguments we're using inside a BatchLoader batch. --- .../fix-lazy-blobs-requesting-all-previous-blobs.yml | 6 ++++++ lib/gitlab/gitaly_client/blob_service.rb | 4 ++-- spec/models/blob_spec.rb | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 changelogs/unreleased/fix-lazy-blobs-requesting-all-previous-blobs.yml diff --git a/changelogs/unreleased/fix-lazy-blobs-requesting-all-previous-blobs.yml b/changelogs/unreleased/fix-lazy-blobs-requesting-all-previous-blobs.yml new file mode 100644 index 00000000000..58f5a9c943c --- /dev/null +++ b/changelogs/unreleased/fix-lazy-blobs-requesting-all-previous-blobs.yml @@ -0,0 +1,6 @@ +--- +title: Fix Blob.lazy always loading all previously-requested blobs when a new request + is made +merge_request: +author: +type: performance diff --git a/lib/gitlab/gitaly_client/blob_service.rb b/lib/gitlab/gitaly_client/blob_service.rb index 6b8e58e6199..8ccefb00d20 100644 --- a/lib/gitlab/gitaly_client/blob_service.rb +++ b/lib/gitlab/gitaly_client/blob_service.rb @@ -55,13 +55,13 @@ module Gitlab def get_blobs(revision_paths, limit = -1) return [] if revision_paths.empty? - revision_paths.map! do |rev, path| + request_revision_paths = revision_paths.map do |rev, path| Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path)) end request = Gitaly::GetBlobsRequest.new( repository: @gitaly_repo, - revision_paths: revision_paths, + revision_paths: request_revision_paths, limit: limit ) diff --git a/spec/models/blob_spec.rb b/spec/models/blob_spec.rb index d0e1688cce3..8364293b908 100644 --- a/spec/models/blob_spec.rb +++ b/spec/models/blob_spec.rb @@ -43,6 +43,21 @@ describe Blob do changelog.id contributing.id end + + it 'does not include blobs from previous requests in later requests' do + changelog = described_class.lazy(project, commit_id, 'CHANGELOG') + contributing = described_class.lazy(same_project, commit_id, 'CONTRIBUTING.md') + + # Access property so the values are loaded + changelog.id + contributing.id + + readme = described_class.lazy(project, commit_id, 'README.md') + + expect(project.repository).to receive(:blobs_at).with([[commit_id, 'README.md']]).once.and_call_original + + readme.id + end end describe '#data' do -- cgit v1.2.1