From 437c8072e8a735cadad591d8593438e668b31fab Mon Sep 17 00:00:00 2001 From: Andrew Newdigate Date: Wed, 4 Oct 2017 13:55:21 +0100 Subject: Batch call experiment --- Gemfile | 2 ++ Gemfile.lock | 2 ++ app/controllers/projects/tree_controller.rb | 6 ++--- app/models/repository.rb | 41 +++++++++++++++++++++++++++++ app/serializers/blob_entity.rb | 2 +- app/serializers/tree_entity.rb | 2 +- config/initializers/batch_loader.rb | 1 + lib/gitlab/git/repository.rb | 29 ++++++++++++++++++++ 8 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 config/initializers/batch_loader.rb diff --git a/Gemfile b/Gemfile index 44c459c497f..7e0939b2647 100644 --- a/Gemfile +++ b/Gemfile @@ -409,3 +409,5 @@ gem 'flipper-active_record', '~> 0.10.2' # Structured logging gem 'lograge', '~> 0.5' gem 'grape_logging', '~> 1.7' + +gem 'batch-loader' diff --git a/Gemfile.lock b/Gemfile.lock index a0ad2716c01..e6aa502a6d3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -73,6 +73,7 @@ GEM thread_safe (~> 0.3, >= 0.3.1) babosa (1.0.2) base32 (0.3.2) + batch-loader (1.0.3) bcrypt (3.1.11) bcrypt_pbkdf (1.0.0) benchmark-ips (2.3.0) @@ -975,6 +976,7 @@ DEPENDENCIES awesome_print (~> 1.2.0) babosa (~> 1.0.2) base32 (~> 0.3.0) + batch-loader bcrypt_pbkdf (~> 1.0) benchmark-ips (~> 2.3.0) better_errors (~> 2.1.0) diff --git a/app/controllers/projects/tree_controller.rb b/app/controllers/projects/tree_controller.rb index f3719059f88..a097c3834b8 100644 --- a/app/controllers/projects/tree_controller.rb +++ b/app/controllers/projects/tree_controller.rb @@ -38,9 +38,9 @@ class Projects::TreeController < Projects::ApplicationController page_title @path.presence || _("Files"), @ref, @project.name_with_namespace # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/38261 - Gitlab::GitalyClient.allow_n_plus_1_calls do - render json: TreeSerializer.new(project: @project, repository: @repository, ref: @ref).represent(@tree) - end + serialized_tree = TreeSerializer.new(project: @project, repository: @repository, ref: @ref).represent(@tree) + + render json: serialized_tree end end end diff --git a/app/models/repository.rb b/app/models/repository.rb index d47dc9a05cd..4d77b57b8b9 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -1,4 +1,5 @@ require 'securerandom' +require 'batch-loader' class Repository REF_MERGE_REQUEST = 'merge-requests'.freeze @@ -624,6 +625,46 @@ class Repository end end + # TODO: Ruby may already offer a function like this + # otherwise move it somewhere else, ie a collection + # utils class + def hash_by_kv(array, key, value) + array.inject(Hash.new) do |memo, item| + k = item[key] + v = item[value] + + array = memo[k] + if array + array.push(v) + else + memo[k] = [v] + end + + memo + end + end + + def last_commit_for_path_lazy(sha, path) + BatchLoader.for({ sha: sha, path: path }).batch do |items, loader| + paths_by_sha = hash_by_kv(items, :sha, :path) + # Bulk fetch the paths for each SHA + last_commits_for_items = Hash.new + paths_by_sha.each do |for_sha, paths| + last_commits_for_sha = raw_repository.last_commit_for_paths(for_sha, paths) + + last_commits_for_sha.each do |result_path, last_commit| + last_commits_for_items[{ sha: for_sha, path: result_path }] = last_commit + end + end + + # Present the results to the loader + items.each do |item| + last_commit = last_commits_for_items[item] + loader.call(item, last_commit) + end + end + end + def last_commit_for_path(sha, path) raw_repository.gitaly_migrate(:last_commit_for_path) do |is_enabled| if is_enabled diff --git a/app/serializers/blob_entity.rb b/app/serializers/blob_entity.rb index 56f173e5a27..38e82ac93d4 100644 --- a/app/serializers/blob_entity.rb +++ b/app/serializers/blob_entity.rb @@ -4,7 +4,7 @@ class BlobEntity < Grape::Entity expose :id, :path, :name, :mode expose :last_commit do |blob| - request.project.repository.last_commit_for_path(blob.commit_id, blob.path) + request.project.repository.last_commit_for_path_lazy(blob.commit_id, blob.path) end expose :icon do |blob| diff --git a/app/serializers/tree_entity.rb b/app/serializers/tree_entity.rb index 555e5cf83bd..a16a91c2db9 100644 --- a/app/serializers/tree_entity.rb +++ b/app/serializers/tree_entity.rb @@ -4,7 +4,7 @@ class TreeEntity < Grape::Entity expose :id, :path, :name, :mode expose :last_commit do |tree| - request.project.repository.last_commit_for_path(tree.commit_id, tree.path) + request.project.repository.last_commit_for_path_lazy(tree.commit_id, tree.path) end expose :icon do |tree| diff --git a/config/initializers/batch_loader.rb b/config/initializers/batch_loader.rb new file mode 100644 index 00000000000..2e2256b0eb9 --- /dev/null +++ b/config/initializers/batch_loader.rb @@ -0,0 +1 @@ +Rails.application.config.middleware.use(BatchLoader::Middleware) diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index a76befe4b52..9d2634e2ef8 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -484,6 +484,35 @@ module Gitlab commits end + # A placeholder. This should be replaced with a Gitaly batch call + def last_commit_for_paths(sha, paths) + results = Hash.new + return results if paths.empty? + + path_set = Set.new paths + + walker = Rugged::Walker.new(rugged) + walker.sorting(Rugged::SORT_DATE) + + sha_from = sha_from_ref(sha) + walker.push(sha_from) + walker.each do |commit| + # TODO: handle directories too + diff = commit.diff(paths: path_set.to_a) + diff.each_delta do |delta| + delta_path = delta.new_file[:path] + if path_set.include?(delta_path) + results[delta_path] = commit.oid + path_set.delete(delta_path) + + return results if path_set.empty? + end + end + end + + results + end + # Counts the amount of commits between `from` and `to`. def count_commits_between(from, to) Commit.between(self, from, to).size -- cgit v1.2.1