diff options
author | John Cai <jcai@gitlab.com> | 2019-08-22 15:57:18 -0700 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2019-09-12 10:03:15 -0700 |
commit | 890d4ba6e771b3dd174f84af5c144c73dd15a086 (patch) | |
tree | 2cd84d36d99e129f04154489507eff1cda099f8f | |
parent | 1ffa66effbc9272ac9755ba29fab2fd1b3d93db5 (diff) | |
download | gitlab-ce-jc-optimize-uri-type.tar.gz |
Use GetBlobs RPC for uri typejc-optimize-uri-type
Call a modified GetBlobs RPC that provides the tree entry type. Modify
relative_link_filter to get all the tree entry types instead of calling
an RPC per entry.
-rw-r--r-- | GITALY_SERVER_VERSION | 2 | ||||
-rw-r--r-- | Gemfile | 2 | ||||
-rw-r--r-- | Gemfile.lock | 4 | ||||
-rw-r--r-- | changelogs/unreleased/jc-optimize-uri-type.yml | 5 | ||||
-rw-r--r-- | lib/banzai/filter/relative_link_filter.rb | 108 | ||||
-rw-r--r-- | lib/gitlab/gitaly_client/blob_service.rb | 36 | ||||
-rw-r--r-- | spec/lib/banzai/filter/relative_link_filter_spec.rb | 28 |
7 files changed, 166 insertions, 19 deletions
diff --git a/GITALY_SERVER_VERSION b/GITALY_SERVER_VERSION index 91951fd8ad7..76d05362056 100644 --- a/GITALY_SERVER_VERSION +++ b/GITALY_SERVER_VERSION @@ -1 +1 @@ -1.61.0 +1.62.0 @@ -428,7 +428,7 @@ group :ed25519 do end # Gitaly GRPC protocol definitions -gem 'gitaly', '~> 1.58.0' +gem 'gitaly', '~> 1.62.0' gem 'grpc', '~> 1.19.0' diff --git a/Gemfile.lock b/Gemfile.lock index 515e1795671..492f517e829 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -332,7 +332,7 @@ GEM po_to_json (>= 1.0.0) rails (>= 3.2.0) git (1.5.0) - gitaly (1.58.0) + gitaly (1.62.0) grpc (~> 1.0) github-markup (1.7.0) gitlab-labkit (0.5.2) @@ -1126,7 +1126,7 @@ DEPENDENCIES gettext (~> 3.2.2) gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails_js (~> 1.3) - gitaly (~> 1.58.0) + gitaly (~> 1.62.0) github-markup (~> 1.7.0) gitlab-labkit (~> 0.5) gitlab-markup (~> 1.7.0) diff --git a/changelogs/unreleased/jc-optimize-uri-type.yml b/changelogs/unreleased/jc-optimize-uri-type.yml new file mode 100644 index 00000000000..34bd3a0989f --- /dev/null +++ b/changelogs/unreleased/jc-optimize-uri-type.yml @@ -0,0 +1,5 @@ +--- +title: Use GetBlobs RPC for uri type +merge_request: 32167 +author: +type: performance diff --git a/lib/banzai/filter/relative_link_filter.rb b/lib/banzai/filter/relative_link_filter.rb index 2b734db5cfb..d0317aa5ff5 100644 --- a/lib/banzai/filter/relative_link_filter.rb +++ b/lib/banzai/filter/relative_link_filter.rb @@ -20,16 +20,12 @@ module Banzai def call return doc if context[:system_note] - @uri_types = {} clear_memoization(:linkable_files) - doc.search('a:not(.gfm)').each do |el| - process_link_attr el.attribute('href') - end + load_uri_types - doc.css('img, video').each do |el| - process_link_attr el.attribute('src') - process_link_attr el.attribute('data-src') + linkable_attributes.each do |attr| + process_link_attr(attr) end doc @@ -37,12 +33,77 @@ module Banzai protected + def load_uri_types + return unless linkable_files? + return {} unless repository + + clear_memoization(:linkable_attributes) + + @uri_types = request_path.present? ? get_uri_types([request_path]) : {} + + paths = linkable_attributes.flat_map do |attr| + [get_uri(attr).to_s, relative_file_path(get_uri(attr))] + end + + paths.reject!(&:blank?) + paths.uniq! + + @uri_types.merge!(get_uri_types(paths)) + end + def linkable_files? strong_memoize(:linkable_files) do context[:project_wiki].nil? && repository.try(:exists?) && !repository.empty? end end + def linkable_attributes + strong_memoize(:linkable_attributes) do + attrs = [] + + attrs += doc.search('a:not(.gfm)').map do |el| + el.attribute('href') + end + + attrs += doc.search('img, video').flat_map do |el| + [el.attribute('src'), el.attribute('data-src')] + end + + attrs.reject do |attr| + attr.blank? || attr.value.start_with?('//') + end + end + end + + def get_uri_types(paths) + revision_paths = paths.collect do |path| + [current_commit.sha, path.chomp("/")] + end + + uri_types = Gitlab::GitalyClient::BlobService.new(repository).get_blob_types(revision_paths, 1) + + uri_types.each do |name, type| + if type == :blob + blob = ::Blob.decorate(Gitlab::Git::Blob.new(name: name), project) + uri_types[name] = :raw if blob.image? || blob.video? + elsif type.nil? + uri_types[name] = legacy_uri_type(name) + end + end + + uri_types + end + + def get_uri(html_attr) + return if html_attr.blank? + return if html_attr.value.start_with?('//') + + uri = URI(html_attr.value) + + uri if uri.relative? && uri.path.present? + rescue URI::Error, Addressable::URI::InvalidURIError + end + def process_link_attr(html_attr) return if html_attr.blank? return if html_attr.value.start_with?('//') @@ -81,6 +142,7 @@ module Banzai def process_link_to_repository_attr(html_attr) uri = URI(html_attr.value) + if uri.relative? && uri.path.present? html_attr.value = rebuild_relative_uri(uri).to_s end @@ -89,7 +151,7 @@ module Banzai end def rebuild_relative_uri(uri) - file_path = relative_file_path(uri) + file_path = nested_file_path_if_exists(uri) uri.path = [ relative_url_root, @@ -102,13 +164,29 @@ module Banzai uri end - def relative_file_path(uri) - path = Addressable::URI.unescape(uri.path).delete("\0") - request_path = Addressable::URI.unescape(context[:requested_path]) - nested_path = build_relative_path(path, request_path) + def nested_file_path_if_exists(uri) + path = cleaned_file_path(uri) + nested_path = relative_file_path(uri) + file_exists?(nested_path) ? nested_path : path end + def cleaned_file_path(uri) + Addressable::URI.unescape(uri.path).delete("\0").chomp("/") + end + + def relative_file_path(uri) + return if uri.nil? + + build_relative_path(cleaned_file_path(uri), request_path) + end + + def request_path + return unless context[:requested_path] + + Addressable::URI.unescape(context[:requested_path]).chomp("/") + end + # Convert a relative path into its correct location based on the currently # requested path # @@ -152,13 +230,17 @@ module Banzai path.present? && !!uri_type(path) end - def uri_type(path) + def legacy_uri_type(path) # https://gitlab.com/gitlab-org/gitlab-ce/issues/58657 Gitlab::GitalyClient.allow_n_plus_1_calls do @uri_types[path] ||= current_commit.uri_type(path) end end + def uri_type(path) + @uri_types[path] + end + def current_commit @current_commit ||= context[:commit] || repository.commit(ref) end diff --git a/lib/gitlab/gitaly_client/blob_service.rb b/lib/gitlab/gitaly_client/blob_service.rb index 8ccefb00d20..0d6dcb6fda5 100644 --- a/lib/gitlab/gitaly_client/blob_service.rb +++ b/lib/gitlab/gitaly_client/blob_service.rb @@ -76,6 +76,30 @@ module Gitlab GitalyClient::BlobsStitcher.new(response) end + def get_blob_types(revision_paths, limit = -1) + return {} if revision_paths.empty? + + request_revision_paths = revision_paths.map do |rev, path| + Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path)) + end + + request = Gitaly::GetBlobsRequest.new( + repository: @gitaly_repo, + revision_paths: request_revision_paths, + limit: limit + ) + + response = GitalyClient.call( + @gitaly_repo.storage_name, + :blob_service, + :get_blobs, + request, + timeout: GitalyClient.fast_timeout + ) + + map_blob_types(response) + end + def get_new_lfs_pointers(revision, limit, not_in, dynamic_timeout = nil) request = Gitaly::GetNewLFSPointersRequest.new( repository: @gitaly_repo, @@ -132,6 +156,18 @@ module Gitlab end end end + + def map_blob_types(response) + types = {} + response.each do |msg| + next if msg.type.nil? + next if msg.type == :UNKNOWN + + types[msg.path.dup.force_encoding('utf-8')] = msg.type.downcase + end + + types + end end end end diff --git a/spec/lib/banzai/filter/relative_link_filter_spec.rb b/spec/lib/banzai/filter/relative_link_filter_spec.rb index f8b3748c663..8e55f12ddc5 100644 --- a/spec/lib/banzai/filter/relative_link_filter_spec.rb +++ b/spec/lib/banzai/filter/relative_link_filter_spec.rb @@ -3,6 +3,9 @@ require 'spec_helper' describe Banzai::Filter::RelativeLinkFilter do + include GitHelpers + include RepoHelpers + def filter(doc, contexts = {}) contexts.reverse_merge!({ commit: commit, @@ -34,6 +37,12 @@ describe Banzai::Filter::RelativeLinkFilter do %(<div>#{element}</div>) end + def allow_gitaly_n_plus_1 + Gitlab::GitalyClient.allow_n_plus_1_calls do + yield + end + end + let(:project) { create(:project, :repository, :public) } let(:user) { create(:user) } let(:group) { nil } @@ -44,6 +53,19 @@ describe Banzai::Filter::RelativeLinkFilter do let(:requested_path) { '/' } let(:only_path) { true } + it 'does not trigger a gitaly n+1', :request_store do + raw_doc = "" + + allow_gitaly_n_plus_1 do + 30.times do |i| + create_file_in_repo(project, ref, ref, "new_file_#{i}", "x" ) + raw_doc += link("new_file_#{i}") + end + end + + expect { filter(raw_doc) }.to change { Gitlab::GitalyClient.get_request_count }.by(2) + end + shared_examples :preserve_unchanged do it 'does not modify any relative URL in anchor' do doc = filter(link('README.md')) @@ -244,7 +266,8 @@ describe Banzai::Filter::RelativeLinkFilter do end context 'when ref name contains special chars' do - let(:ref) {'mark#\'@],+;-._/#@!$&()+down'} + let(:ref) { 'mark#\'@],+;-._/#@!$&()+down' } + let(:path) { 'files/images/logo-black.png' } it 'correctly escapes the ref' do # Addressable won't escape the '#', so we do this manually @@ -252,8 +275,9 @@ describe Banzai::Filter::RelativeLinkFilter do # Stub this method so the branch doesn't actually need to be in the repo allow_any_instance_of(described_class).to receive(:uri_type).and_return(:raw) + allow_any_instance_of(described_class).to receive(:get_uri_types).and_return({ path: :tree }) - doc = filter(link('files/images/logo-black.png')) + doc = filter(link(path)) expect(doc.at_css('a')['href']) .to eq "/#{project_path}/raw/#{ref_escaped}/files/images/logo-black.png" |