From 7357209f91ae4c0b504f47e36220bd04a0e2feca Mon Sep 17 00:00:00 2001 From: Mario de la Ossa Date: Wed, 6 Jun 2018 18:14:10 -0600 Subject: Implement filtering by filename on code search --- changelogs/unreleased/ce-5024-filename-search.yml | 5 +++ doc/api/search.md | 9 ++++ lib/gitlab/file_finder.rb | 17 ++++--- lib/gitlab/search/parsed_query.rb | 23 ++++++++++ lib/gitlab/search/query.rb | 55 +++++++++++++++++++++++ spec/lib/gitlab/file_finder_spec.rb | 20 ++++++++- spec/lib/gitlab/search/query_spec.rb | 39 ++++++++++++++++ spec/requests/api/search_spec.rb | 24 ++++++++++ 8 files changed, 186 insertions(+), 6 deletions(-) create mode 100644 changelogs/unreleased/ce-5024-filename-search.yml create mode 100644 lib/gitlab/search/parsed_query.rb create mode 100644 lib/gitlab/search/query.rb create mode 100644 spec/lib/gitlab/search/query_spec.rb diff --git a/changelogs/unreleased/ce-5024-filename-search.yml b/changelogs/unreleased/ce-5024-filename-search.yml new file mode 100644 index 00000000000..a8bf9b1f802 --- /dev/null +++ b/changelogs/unreleased/ce-5024-filename-search.yml @@ -0,0 +1,5 @@ +--- +title: Add filename filtering to code search +merge_request: 19509 +author: +type: added diff --git a/doc/api/search.md b/doc/api/search.md index 107ddaffa6a..9716f682ace 100644 --- a/doc/api/search.md +++ b/doc/api/search.md @@ -776,6 +776,15 @@ Example response: ### Scope: blobs +Filters are available for this scope: +- filename +- path +- extension + +to use a filter simply include it in your query like so: `a query filename:some_name*`. + +You may use wildcards (`*`) to use glob matching. + ```bash curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=blobs&search=installation ``` diff --git a/lib/gitlab/file_finder.rb b/lib/gitlab/file_finder.rb index f42088f980e..af8270c8db8 100644 --- a/lib/gitlab/file_finder.rb +++ b/lib/gitlab/file_finder.rb @@ -14,14 +14,21 @@ module Gitlab end def find(query) - by_content = find_by_content(query) + query = Gitlab::Search::Query.new(query) do + filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i } + filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i } + filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i } + end + + by_content = find_by_content(query.term) already_found = Set.new(by_content.map(&:filename)) - by_filename = find_by_filename(query, except: already_found) + by_filename = find_by_filename(query.term, except: already_found) + + files = (by_content + by_filename) + .sort_by(&:filename) - (by_content + by_filename) - .sort_by(&:filename) - .map { |blob| [blob.filename, blob] } + query.filter_results(files).map { |blob| [blob.filename, blob] } end private diff --git a/lib/gitlab/search/parsed_query.rb b/lib/gitlab/search/parsed_query.rb new file mode 100644 index 00000000000..23595f23f01 --- /dev/null +++ b/lib/gitlab/search/parsed_query.rb @@ -0,0 +1,23 @@ +module Gitlab + module Search + class ParsedQuery + attr_reader :term, :filters + + def initialize(term, filters) + @term = term + @filters = filters + end + + def filter_results(results) + filters = @filters.reject { |filter| filter[:matcher].nil? } + return unless filters + + results.select do |result| + filters.all? do |filter| + filter[:matcher].call(filter, result) + end + end + end + end + end +end diff --git a/lib/gitlab/search/query.rb b/lib/gitlab/search/query.rb new file mode 100644 index 00000000000..8583bce7792 --- /dev/null +++ b/lib/gitlab/search/query.rb @@ -0,0 +1,55 @@ +module Gitlab + module Search + class Query < SimpleDelegator + def initialize(query, filter_opts = {}, &block) + @raw_query = query.dup + @filters = [] + @filter_options = { default_parser: :downcase.to_proc }.merge(filter_opts) + + self.instance_eval(&block) if block_given? + + @query = Gitlab::Search::ParsedQuery.new(*extract_filters) + # set the ParsedQuery as our default delegator thanks to SimpleDelegator + super(@query) + end + + private + + def filter(name, **attributes) + filter = { parser: @filter_options[:default_parser], name: name }.merge(attributes) + + @filters << filter + end + + def filter_options(**options) + @filter_options.merge!(options) + end + + def extract_filters + fragments = [] + + filters = @filters.each_with_object([]) do |filter, parsed_filters| + match = @raw_query.split.find { |part| part =~ /\A#{filter[:name]}:/ } + next unless match + + input = match.split(':')[1..-1].join + next if input.empty? + + filter[:value] = parse_filter(filter, input) + filter[:regex_value] = Regexp.escape(filter[:value]).gsub('\*', '.*?') + fragments << match + + parsed_filters << filter + end + + query = (@raw_query.split - fragments).join(' ') + + [query, filters] + end + + def parse_filter(filter, input) + filter[:parser].call(input) + end + end + end +end diff --git a/spec/lib/gitlab/file_finder_spec.rb b/spec/lib/gitlab/file_finder_spec.rb index d6d9e4001a3..b49c5817131 100644 --- a/spec/lib/gitlab/file_finder_spec.rb +++ b/spec/lib/gitlab/file_finder_spec.rb @@ -3,11 +3,29 @@ require 'spec_helper' describe Gitlab::FileFinder do describe '#find' do let(:project) { create(:project, :public, :repository) } + subject { described_class.new(project, project.default_branch) } it_behaves_like 'file finder' do - subject { described_class.new(project, project.default_branch) } let(:expected_file_by_name) { 'files/images/wm.svg' } let(:expected_file_by_content) { 'CHANGELOG' } end + + it 'filters by name' do + results = subject.find('files filename:wm.svg') + + expect(results.count).to eq(1) + end + + it 'filters by path' do + results = subject.find('white path:images') + + expect(results.count).to eq(1) + end + + it 'filters by extension' do + results = subject.find('files extension:svg') + + expect(results.count).to eq(1) + end end end diff --git a/spec/lib/gitlab/search/query_spec.rb b/spec/lib/gitlab/search/query_spec.rb new file mode 100644 index 00000000000..2d00428fffa --- /dev/null +++ b/spec/lib/gitlab/search/query_spec.rb @@ -0,0 +1,39 @@ +require 'spec_helper' + +describe Gitlab::Search::Query do + let(:query) { 'base filter:wow anotherfilter:noway name:maybe other:mmm leftover' } + let(:subject) do + described_class.new(query) do + filter :filter + filter :name, parser: :upcase.to_proc + filter :other + end + end + + it { expect(described_class).to be < SimpleDelegator } + + it 'leaves undefined filters in the main query' do + expect(subject.term).to eq('base anotherfilter:noway leftover') + end + + it 'parses filters' do + expect(subject.filters.count).to eq(3) + expect(subject.filters.map { |f| f[:value] }).to match_array(%w[wow MAYBE mmm]) + end + + context 'with an empty filter' do + let(:query) { 'some bar name: baz' } + + it 'ignores empty filters' do + expect(subject.term).to eq('some bar name: baz') + end + end + + context 'with a pipe' do + let(:query) { 'base | nofilter' } + + it 'does not escape the pipe' do + expect(subject.term).to eq(query) + end + end +end diff --git a/spec/requests/api/search_spec.rb b/spec/requests/api/search_spec.rb index aca4aa40027..f8e468be170 100644 --- a/spec/requests/api/search_spec.rb +++ b/spec/requests/api/search_spec.rb @@ -312,6 +312,30 @@ describe API::Search do end it_behaves_like 'response is correct', schema: 'public_api/v4/blobs', size: 2 + + context 'filters' do + it 'by filename' do + get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon filename:PROCESS.md' + + expect(response).to have_gitlab_http_status(200) + expect(json_response.size).to eq(2) + expect(json_response.first['filename']).to eq('PROCESS.md') + end + + it 'by path' do + get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon path:markdown' + + expect(response).to have_gitlab_http_status(200) + expect(json_response.size).to eq(8) + end + + it 'by extension' do + get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon extension:md' + + expect(response).to have_gitlab_http_status(200) + expect(json_response.size).to eq(11) + end + end end end end -- cgit v1.2.1