summaryrefslogtreecommitdiff
path: root/lib/gitlab/sql/pattern.rb
blob: 6563968f315e3c679e1d226ff1b1caa561c67436 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# frozen_string_literal: true

module Gitlab
  module SQL
    module Pattern
      extend ActiveSupport::Concern

      MIN_CHARS_FOR_PARTIAL_MATCHING = 3
      REGEX_QUOTED_TERM = /(?<=\A| )"[^"]+"(?= |\z)/.freeze

      class_methods do
        def fuzzy_search(query, columns, use_minimum_char_limit: true)
          matches = columns.map do |col|
            fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit)
          end.compact.reduce(:or)

          where(matches)
        end

        def to_pattern(query, use_minimum_char_limit: true)
          if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit)
            "%#{sanitize_sql_like(query)}%"
          else
            sanitize_sql_like(query)
          end
        end

        def min_chars_for_partial_matching
          MIN_CHARS_FOR_PARTIAL_MATCHING
        end

        def partial_matching?(query, use_minimum_char_limit: true)
          return true unless use_minimum_char_limit

          query.length >= min_chars_for_partial_matching
        end

        # column - The column name / Arel column to search in.
        # query - The text to search for.
        # lower_exact_match - When set to `true` we'll fall back to using
        #                     `LOWER(column) = query` instead of using `ILIKE`.
        def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true)
          return unless query.is_a?(String)

          query = query.squish
          return unless query.present?

          arel_column = column.is_a?(Arel::Attributes::Attribute) ? column : arel_table[column]

          words = select_fuzzy_terms(query, use_minimum_char_limit: use_minimum_char_limit)

          if words.any?
            words.map { |word| arel_column.matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and)
          elsif lower_exact_match
            # No words of at least 3 chars, but we can search for an exact
            # case insensitive match with the query as a whole
            Arel::Nodes::NamedFunction
                .new('LOWER', [arel_column])
                .eq(query)
          else
            arel_column.matches(sanitize_sql_like(query))
          end
        end

        def select_fuzzy_terms(query, use_minimum_char_limit: true)
          terms = Gitlab::SQL::Pattern.split_query_to_search_terms(query)
          terms.select { |term| partial_matching?(term, use_minimum_char_limit: use_minimum_char_limit) }
        end
      end

      def self.split_query_to_search_terms(query)
        quoted_terms = []

        query = query.gsub(REGEX_QUOTED_TERM) do |quoted_term|
          quoted_terms << quoted_term
          ""
        end

        query.split + quoted_terms.map { |quoted_term| quoted_term[1..-2] }
      end
    end
  end
end

Gitlab::SQL::Pattern.prepend_mod
Gitlab::SQL::Pattern::ClassMethods.prepend_mod_with('Gitlab::SQL::Pattern::ClassMethods')