summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Cai <jcai@gitlab.com>2019-08-22 15:57:18 -0700
committerJohn Cai <jcai@gitlab.com>2019-09-12 10:03:15 -0700
commit890d4ba6e771b3dd174f84af5c144c73dd15a086 (patch)
tree2cd84d36d99e129f04154489507eff1cda099f8f
parent1ffa66effbc9272ac9755ba29fab2fd1b3d93db5 (diff)
downloadgitlab-ce-jc-optimize-uri-type.tar.gz
Use GetBlobs RPC for uri typejc-optimize-uri-type
Call a modified GetBlobs RPC that provides the tree entry type. Modify relative_link_filter to get all the tree entry types instead of calling an RPC per entry.
-rw-r--r--GITALY_SERVER_VERSION2
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock4
-rw-r--r--changelogs/unreleased/jc-optimize-uri-type.yml5
-rw-r--r--lib/banzai/filter/relative_link_filter.rb108
-rw-r--r--lib/gitlab/gitaly_client/blob_service.rb36
-rw-r--r--spec/lib/banzai/filter/relative_link_filter_spec.rb28
7 files changed, 166 insertions, 19 deletions
diff --git a/GITALY_SERVER_VERSION b/GITALY_SERVER_VERSION
index 91951fd8ad7..76d05362056 100644
--- a/GITALY_SERVER_VERSION
+++ b/GITALY_SERVER_VERSION
@@ -1 +1 @@
-1.61.0
+1.62.0
diff --git a/Gemfile b/Gemfile
index d080ef67e3a..2da6c45d6ba 100644
--- a/Gemfile
+++ b/Gemfile
@@ -428,7 +428,7 @@ group :ed25519 do
end
# Gitaly GRPC protocol definitions
-gem 'gitaly', '~> 1.58.0'
+gem 'gitaly', '~> 1.62.0'
gem 'grpc', '~> 1.19.0'
diff --git a/Gemfile.lock b/Gemfile.lock
index 515e1795671..492f517e829 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -332,7 +332,7 @@ GEM
po_to_json (>= 1.0.0)
rails (>= 3.2.0)
git (1.5.0)
- gitaly (1.58.0)
+ gitaly (1.62.0)
grpc (~> 1.0)
github-markup (1.7.0)
gitlab-labkit (0.5.2)
@@ -1126,7 +1126,7 @@ DEPENDENCIES
gettext (~> 3.2.2)
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3)
- gitaly (~> 1.58.0)
+ gitaly (~> 1.62.0)
github-markup (~> 1.7.0)
gitlab-labkit (~> 0.5)
gitlab-markup (~> 1.7.0)
diff --git a/changelogs/unreleased/jc-optimize-uri-type.yml b/changelogs/unreleased/jc-optimize-uri-type.yml
new file mode 100644
index 00000000000..34bd3a0989f
--- /dev/null
+++ b/changelogs/unreleased/jc-optimize-uri-type.yml
@@ -0,0 +1,5 @@
+---
+title: Use GetBlobs RPC for uri type
+merge_request: 32167
+author:
+type: performance
diff --git a/lib/banzai/filter/relative_link_filter.rb b/lib/banzai/filter/relative_link_filter.rb
index 2b734db5cfb..d0317aa5ff5 100644
--- a/lib/banzai/filter/relative_link_filter.rb
+++ b/lib/banzai/filter/relative_link_filter.rb
@@ -20,16 +20,12 @@ module Banzai
def call
return doc if context[:system_note]
- @uri_types = {}
clear_memoization(:linkable_files)
- doc.search('a:not(.gfm)').each do |el|
- process_link_attr el.attribute('href')
- end
+ load_uri_types
- doc.css('img, video').each do |el|
- process_link_attr el.attribute('src')
- process_link_attr el.attribute('data-src')
+ linkable_attributes.each do |attr|
+ process_link_attr(attr)
end
doc
@@ -37,12 +33,77 @@ module Banzai
protected
+ def load_uri_types
+ return unless linkable_files?
+ return {} unless repository
+
+ clear_memoization(:linkable_attributes)
+
+ @uri_types = request_path.present? ? get_uri_types([request_path]) : {}
+
+ paths = linkable_attributes.flat_map do |attr|
+ [get_uri(attr).to_s, relative_file_path(get_uri(attr))]
+ end
+
+ paths.reject!(&:blank?)
+ paths.uniq!
+
+ @uri_types.merge!(get_uri_types(paths))
+ end
+
def linkable_files?
strong_memoize(:linkable_files) do
context[:project_wiki].nil? && repository.try(:exists?) && !repository.empty?
end
end
+ def linkable_attributes
+ strong_memoize(:linkable_attributes) do
+ attrs = []
+
+ attrs += doc.search('a:not(.gfm)').map do |el|
+ el.attribute('href')
+ end
+
+ attrs += doc.search('img, video').flat_map do |el|
+ [el.attribute('src'), el.attribute('data-src')]
+ end
+
+ attrs.reject do |attr|
+ attr.blank? || attr.value.start_with?('//')
+ end
+ end
+ end
+
+ def get_uri_types(paths)
+ revision_paths = paths.collect do |path|
+ [current_commit.sha, path.chomp("/")]
+ end
+
+ uri_types = Gitlab::GitalyClient::BlobService.new(repository).get_blob_types(revision_paths, 1)
+
+ uri_types.each do |name, type|
+ if type == :blob
+ blob = ::Blob.decorate(Gitlab::Git::Blob.new(name: name), project)
+ uri_types[name] = :raw if blob.image? || blob.video?
+ elsif type.nil?
+ uri_types[name] = legacy_uri_type(name)
+ end
+ end
+
+ uri_types
+ end
+
+ def get_uri(html_attr)
+ return if html_attr.blank?
+ return if html_attr.value.start_with?('//')
+
+ uri = URI(html_attr.value)
+
+ uri if uri.relative? && uri.path.present?
+ rescue URI::Error, Addressable::URI::InvalidURIError
+ end
+
def process_link_attr(html_attr)
return if html_attr.blank?
return if html_attr.value.start_with?('//')
@@ -81,6 +142,7 @@ module Banzai
def process_link_to_repository_attr(html_attr)
uri = URI(html_attr.value)
+
if uri.relative? && uri.path.present?
html_attr.value = rebuild_relative_uri(uri).to_s
end
@@ -89,7 +151,7 @@ module Banzai
end
def rebuild_relative_uri(uri)
- file_path = relative_file_path(uri)
+ file_path = nested_file_path_if_exists(uri)
uri.path = [
relative_url_root,
@@ -102,13 +164,29 @@ module Banzai
uri
end
- def relative_file_path(uri)
- path = Addressable::URI.unescape(uri.path).delete("\0")
- request_path = Addressable::URI.unescape(context[:requested_path])
- nested_path = build_relative_path(path, request_path)
+ def nested_file_path_if_exists(uri)
+ path = cleaned_file_path(uri)
+ nested_path = relative_file_path(uri)
+
file_exists?(nested_path) ? nested_path : path
end
+ def cleaned_file_path(uri)
+ Addressable::URI.unescape(uri.path).delete("\0").chomp("/")
+ end
+
+ def relative_file_path(uri)
+ return if uri.nil?
+
+ build_relative_path(cleaned_file_path(uri), request_path)
+ end
+
+ def request_path
+ return unless context[:requested_path]
+
+ Addressable::URI.unescape(context[:requested_path]).chomp("/")
+ end
+
# Convert a relative path into its correct location based on the currently
# requested path
#
@@ -152,13 +230,17 @@ module Banzai
path.present? && !!uri_type(path)
end
- def uri_type(path)
+ def legacy_uri_type(path)
# https://gitlab.com/gitlab-org/gitlab-ce/issues/58657
Gitlab::GitalyClient.allow_n_plus_1_calls do
@uri_types[path] ||= current_commit.uri_type(path)
end
end
+ def uri_type(path)
+ @uri_types[path]
+ end
+
def current_commit
@current_commit ||= context[:commit] || repository.commit(ref)
end
diff --git a/lib/gitlab/gitaly_client/blob_service.rb b/lib/gitlab/gitaly_client/blob_service.rb
index 8ccefb00d20..0d6dcb6fda5 100644
--- a/lib/gitlab/gitaly_client/blob_service.rb
+++ b/lib/gitlab/gitaly_client/blob_service.rb
@@ -76,6 +76,30 @@ module Gitlab
GitalyClient::BlobsStitcher.new(response)
end
+ def get_blob_types(revision_paths, limit = -1)
+ return {} if revision_paths.empty?
+
+ request_revision_paths = revision_paths.map do |rev, path|
+ Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
+ end
+
+ request = Gitaly::GetBlobsRequest.new(
+ repository: @gitaly_repo,
+ revision_paths: request_revision_paths,
+ limit: limit
+ )
+
+ response = GitalyClient.call(
+ @gitaly_repo.storage_name,
+ :blob_service,
+ :get_blobs,
+ request,
+ timeout: GitalyClient.fast_timeout
+ )
+
+ map_blob_types(response)
+ end
+
def get_new_lfs_pointers(revision, limit, not_in, dynamic_timeout = nil)
request = Gitaly::GetNewLFSPointersRequest.new(
repository: @gitaly_repo,
@@ -132,6 +156,18 @@ module Gitlab
end
end
end
+
+ def map_blob_types(response)
+ types = {}
+ response.each do |msg|
+ next if msg.type.nil?
+ next if msg.type == :UNKNOWN
+
+ types[msg.path.dup.force_encoding('utf-8')] = msg.type.downcase
+ end
+
+ types
+ end
end
end
end
diff --git a/spec/lib/banzai/filter/relative_link_filter_spec.rb b/spec/lib/banzai/filter/relative_link_filter_spec.rb
index f8b3748c663..8e55f12ddc5 100644
--- a/spec/lib/banzai/filter/relative_link_filter_spec.rb
+++ b/spec/lib/banzai/filter/relative_link_filter_spec.rb
@@ -3,6 +3,9 @@
require 'spec_helper'
describe Banzai::Filter::RelativeLinkFilter do
+ include GitHelpers
+ include RepoHelpers
+
def filter(doc, contexts = {})
contexts.reverse_merge!({
commit: commit,
@@ -34,6 +37,12 @@ describe Banzai::Filter::RelativeLinkFilter do
%(<div>#{element}</div>)
end
+ def allow_gitaly_n_plus_1
+ Gitlab::GitalyClient.allow_n_plus_1_calls do
+ yield
+ end
+ end
+
let(:project) { create(:project, :repository, :public) }
let(:user) { create(:user) }
let(:group) { nil }
@@ -44,6 +53,19 @@ describe Banzai::Filter::RelativeLinkFilter do
let(:requested_path) { '/' }
let(:only_path) { true }
+ it 'does not trigger a gitaly n+1', :request_store do
+ raw_doc = ""
+
+ allow_gitaly_n_plus_1 do
+ 30.times do |i|
+ create_file_in_repo(project, ref, ref, "new_file_#{i}", "x" )
+ raw_doc += link("new_file_#{i}")
+ end
+ end
+
+ expect { filter(raw_doc) }.to change { Gitlab::GitalyClient.get_request_count }.by(2)
+ end
+
shared_examples :preserve_unchanged do
it 'does not modify any relative URL in anchor' do
doc = filter(link('README.md'))
@@ -244,7 +266,8 @@ describe Banzai::Filter::RelativeLinkFilter do
end
context 'when ref name contains special chars' do
- let(:ref) {'mark#\'@],+;-._/#@!$&()+down'}
+ let(:ref) { 'mark#\'@],+;-._/#@!$&()+down' }
+ let(:path) { 'files/images/logo-black.png' }
it 'correctly escapes the ref' do
# Addressable won't escape the '#', so we do this manually
@@ -252,8 +275,9 @@ describe Banzai::Filter::RelativeLinkFilter do
# Stub this method so the branch doesn't actually need to be in the repo
allow_any_instance_of(described_class).to receive(:uri_type).and_return(:raw)
+ allow_any_instance_of(described_class).to receive(:get_uri_types).and_return({ path: :tree })
- doc = filter(link('files/images/logo-black.png'))
+ doc = filter(link(path))
expect(doc.at_css('a')['href'])
.to eq "/#{project_path}/raw/#{ref_escaped}/files/images/logo-black.png"