summaryrefslogtreecommitdiff
path: root/lib/gitlab/sql/pattern.rb
blob: 5f0c98cb5a44475809379af895c684908aa3aec1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
module Gitlab
  module SQL
    module Pattern
      extend ActiveSupport::Concern

      MIN_CHARS_FOR_PARTIAL_MATCHING = 3
      REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/

      class_methods do
        def fuzzy_search(query, columns)
          matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or)

          where(matches)
        end

        def to_pattern(query)
          if partial_matching?(query)
            "%#{sanitize_sql_like(query)}%"
          else
            sanitize_sql_like(query)
          end
        end

        def partial_matching?(query)
          query.length >= MIN_CHARS_FOR_PARTIAL_MATCHING
        end

        def fuzzy_arel_match(column, query)
          query = query.squish
          return nil unless query.present?

          words = select_fuzzy_words(query)

          if words.any?
            words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and)
          else
            # No words of at least 3 chars, but we can search for an exact
            # case insensitive match with the query as a whole
            arel_table[column].matches(sanitize_sql_like(query))
          end
        end

        def select_fuzzy_words(query)
          quoted_words = query.scan(REGEX_QUOTED_WORD)

          query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }

          words = query.split

          quoted_words.map! { |quoted_word| quoted_word[1..-2] }

          words.concat(quoted_words)

          words.select { |word| partial_matching?(word) }
        end
      end
    end
  end
end