summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Brandl <abrandl@gitlab.com>2019-07-08 09:49:43 +0000
committerAndreas Brandl <abrandl@gitlab.com>2019-07-08 09:49:43 +0000
commit5214ad1a7f3720b8ea05f1fb99189d2be48f39ec (patch)
tree76dd9096ef6669886ae3d0c8d6b1a30a9bc91391
parent43420a4b26e5901b853e8ef91463e1bc88af0bcb (diff)
parent4f04c4c90b2db8ddcf5f3e28a9bbefd20c8bbda0 (diff)
downloadgitlab-ce-5214ad1a7f3720b8ea05f1fb99189d2be48f39ec.tar.gz
Merge branch '40379-CJK-search-min-chars' into 'master'
Remove minimum required characters for fuzzy search if no trigram is used See merge request gitlab-org/gitlab-ce!29810
-rw-r--r--app/finders/issuable_finder.rb2
-rw-r--r--app/models/concerns/issuable.rb4
-rw-r--r--changelogs/unreleased/40379-CJK-search-min-chars.yml5
-rw-r--r--lib/gitlab/sql/pattern.rb24
-rw-r--r--spec/lib/gitlab/sql/pattern_spec.rb12
-rw-r--r--spec/models/concerns/issuable_spec.rb10
6 files changed, 44 insertions, 13 deletions
diff --git a/app/finders/issuable_finder.rb b/app/finders/issuable_finder.rb
index 3592505a977..f4fbeacfaba 100644
--- a/app/finders/issuable_finder.rb
+++ b/app/finders/issuable_finder.rb
@@ -429,7 +429,7 @@ class IssuableFinder
items = klass.with(cte.to_arel).from(klass.table_name)
end
- items.full_search(search, matched_columns: params[:in])
+ items.full_search(search, matched_columns: params[:in], use_minimum_char_limit: !use_cte_for_search?)
end
# rubocop: enable CodeReuse/ActiveRecord
diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb
index 299e413321d..952de92cae1 100644
--- a/app/models/concerns/issuable.rb
+++ b/app/models/concerns/issuable.rb
@@ -168,7 +168,7 @@ module Issuable
# matched_columns - Modify the scope of the query. 'title', 'description' or joining them with a comma.
#
# Returns an ActiveRecord::Relation.
- def full_search(query, matched_columns: 'title,description')
+ def full_search(query, matched_columns: 'title,description', use_minimum_char_limit: true)
allowed_columns = [:title, :description]
matched_columns = matched_columns.to_s.split(',').map(&:to_sym)
matched_columns &= allowed_columns
@@ -176,7 +176,7 @@ module Issuable
# Matching title or description if the matched_columns did not contain any allowed columns.
matched_columns = [:title, :description] if matched_columns.empty?
- fuzzy_search(query, matched_columns)
+ fuzzy_search(query, matched_columns, use_minimum_char_limit: use_minimum_char_limit)
end
def simple_sorts
diff --git a/changelogs/unreleased/40379-CJK-search-min-chars.yml b/changelogs/unreleased/40379-CJK-search-min-chars.yml
new file mode 100644
index 00000000000..6f6c4df464f
--- /dev/null
+++ b/changelogs/unreleased/40379-CJK-search-min-chars.yml
@@ -0,0 +1,5 @@
+---
+title: Remove minimum character limits for fuzzy searches when using a CTE
+merge_request: 29810
+author:
+type: fixed
diff --git a/lib/gitlab/sql/pattern.rb b/lib/gitlab/sql/pattern.rb
index fd108b4c124..f6edbfced7f 100644
--- a/lib/gitlab/sql/pattern.rb
+++ b/lib/gitlab/sql/pattern.rb
@@ -9,14 +9,16 @@ module Gitlab
REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/.freeze
class_methods do
- def fuzzy_search(query, columns)
- matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or)
+ def fuzzy_search(query, columns, use_minimum_char_limit: true)
+ matches = columns.map do |col|
+ fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit)
+ end.compact.reduce(:or)
where(matches)
end
- def to_pattern(query)
- if partial_matching?(query)
+ def to_pattern(query, use_minimum_char_limit: true)
+ if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit)
"%#{sanitize_sql_like(query)}%"
else
sanitize_sql_like(query)
@@ -27,7 +29,9 @@ module Gitlab
MIN_CHARS_FOR_PARTIAL_MATCHING
end
- def partial_matching?(query)
+ def partial_matching?(query, use_minimum_char_limit: true)
+ return true unless use_minimum_char_limit
+
query.length >= min_chars_for_partial_matching
end
@@ -35,14 +39,14 @@ module Gitlab
# query - The text to search for.
# lower_exact_match - When set to `true` we'll fall back to using
# `LOWER(column) = query` instead of using `ILIKE`.
- def fuzzy_arel_match(column, query, lower_exact_match: false)
+ def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true)
query = query.squish
return unless query.present?
- words = select_fuzzy_words(query)
+ words = select_fuzzy_words(query, use_minimum_char_limit: use_minimum_char_limit)
if words.any?
- words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and)
+ words.map { |word| arel_table[column].matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and)
else
# No words of at least 3 chars, but we can search for an exact
# case insensitive match with the query as a whole
@@ -56,7 +60,7 @@ module Gitlab
end
end
- def select_fuzzy_words(query)
+ def select_fuzzy_words(query, use_minimum_char_limit: true)
quoted_words = query.scan(REGEX_QUOTED_WORD)
query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }
@@ -67,7 +71,7 @@ module Gitlab
words.concat(quoted_words)
- words.select { |word| partial_matching?(word) }
+ words.select { |word| partial_matching?(word, use_minimum_char_limit: use_minimum_char_limit) }
end
end
end
diff --git a/spec/lib/gitlab/sql/pattern_spec.rb b/spec/lib/gitlab/sql/pattern_spec.rb
index 5b5052de372..98838712eae 100644
--- a/spec/lib/gitlab/sql/pattern_spec.rb
+++ b/spec/lib/gitlab/sql/pattern_spec.rb
@@ -10,6 +10,12 @@ describe Gitlab::SQL::Pattern do
it 'returns exact matching pattern' do
expect(to_pattern).to eq('12')
end
+
+ context 'and ignore_minimum_char_limit is true' do
+ it 'returns partial matching pattern' do
+ expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%12%')
+ end
+ end
end
context 'when a query with a escape character is shorter than 3 chars' do
@@ -18,6 +24,12 @@ describe Gitlab::SQL::Pattern do
it 'returns sanitized exact matching pattern' do
expect(to_pattern).to eq('\_2')
end
+
+ context 'and ignore_minimum_char_limit is true' do
+ it 'returns sanitized partial matching pattern' do
+ expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%\_2%')
+ end
+ end
end
context 'when a query is equal to 3 chars' do
diff --git a/spec/models/concerns/issuable_spec.rb b/spec/models/concerns/issuable_spec.rb
index 64f02978d79..68224a56515 100644
--- a/spec/models/concerns/issuable_spec.rb
+++ b/spec/models/concerns/issuable_spec.rb
@@ -223,6 +223,16 @@ describe Issuable do
expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2])
end
+ it 'returns issues with a fuzzy matching description for a query shorter than 3 chars if told to do so' do
+ search = searchable_issue2.description.downcase.scan(/\w+/).sample[-1]
+
+ expect(issuable_class.full_search(search, use_minimum_char_limit: false)).to include(searchable_issue2)
+ end
+
+ it 'returns issues with a fuzzy matching title for a query shorter than 3 chars if told to do so' do
+ expect(issuable_class.full_search('i', use_minimum_char_limit: false)).to include(searchable_issue)
+ end
+
context 'when matching columns is "title"' do
it 'returns issues with a matching title' do
expect(issuable_class.full_search(searchable_issue.title, matched_columns: 'title'))