summaryrefslogtreecommitdiff
path: root/lib/gitlab/search
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/search')
-rw-r--r--lib/gitlab/search/abuse_detection.rb81
-rw-r--r--lib/gitlab/search/abuse_validators/no_abusive_coercion_from_string_validator.rb15
-rw-r--r--lib/gitlab/search/abuse_validators/no_abusive_term_length_validator.rb36
-rw-r--r--lib/gitlab/search/params.rb89
-rw-r--r--lib/gitlab/search/query.rb2
5 files changed, 222 insertions, 1 deletions
diff --git a/lib/gitlab/search/abuse_detection.rb b/lib/gitlab/search/abuse_detection.rb
new file mode 100644
index 00000000000..7b5377bce88
--- /dev/null
+++ b/lib/gitlab/search/abuse_detection.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Search
+ class AbuseDetection
+ include ActiveModel::Validations
+ include AbuseValidators
+
+ ABUSIVE_TERM_SIZE = 100
+ ALLOWED_CHARS_REGEX = %r{\A[[:alnum:]_\-\/\.!]+\z}.freeze
+ MINIMUM_SEARCH_CHARS = 2
+
+ ALLOWED_SCOPES = %w(
+ blobs
+ code
+ commits
+ epics
+ issues
+ merge_requests
+ milestones
+ notes
+ projects
+ snippet_titles
+ users
+ wiki_blobs
+ ).freeze
+
+ READABLE_PARAMS = %i(
+ group_id
+ project_id
+ project_ref
+ query_string
+ repository_ref
+ scope
+ ).freeze
+
+ STOP_WORDS = %w(
+ a an and are as at be but by for if in into is it no not of on or such that the their then there these they this to was will with
+ ).freeze
+
+ validates :project_id, :group_id,
+ numericality: { only_integer: true, message: "abusive ID detected" }, allow_blank: true
+
+ validates :scope, inclusion: { in: ALLOWED_SCOPES, message: 'abusive scope detected' }, allow_blank: true
+
+ validates :repository_ref, :project_ref,
+ format: { with: ALLOWED_CHARS_REGEX, message: "abusive characters detected" }, allow_blank: true
+
+ validates :query_string,
+ exclusion: { in: STOP_WORDS, message: 'stopword only abusive search detected' }, allow_blank: true
+
+ validates :query_string,
+ length: { minimum: MINIMUM_SEARCH_CHARS, message: 'abusive tiny search detected' }, unless: :skip_tiny_search_validation?, allow_blank: true
+
+ validates :query_string,
+ no_abusive_term_length: { maximum: ABUSIVE_TERM_SIZE, maximum_for_url: ABUSIVE_TERM_SIZE * 2 }
+
+ validates :query_string, :repository_ref, :project_ref, no_abusive_coercion_from_string: true
+
+ attr_reader(*READABLE_PARAMS)
+
+ def initialize(params)
+ READABLE_PARAMS.each { |p| instance_variable_set("@#{p}", params[p]) }
+ end
+
+ private
+
+ def skip_tiny_search_validation?
+ wildcard_search? || stop_word_search?
+ end
+
+ def wildcard_search?
+ query_string == '*'
+ end
+
+ def stop_word_search?
+ STOP_WORDS.include? query_string
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/search/abuse_validators/no_abusive_coercion_from_string_validator.rb b/lib/gitlab/search/abuse_validators/no_abusive_coercion_from_string_validator.rb
new file mode 100644
index 00000000000..06464980afd
--- /dev/null
+++ b/lib/gitlab/search/abuse_validators/no_abusive_coercion_from_string_validator.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Search
+ module AbuseValidators
+ class NoAbusiveCoercionFromStringValidator < ActiveModel::EachValidator
+ def validate_each(instance, attribute, value)
+ if value.present? && !value.is_a?(String)
+ instance.errors.add attribute, "abusive coercion from string detected"
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/search/abuse_validators/no_abusive_term_length_validator.rb b/lib/gitlab/search/abuse_validators/no_abusive_term_length_validator.rb
new file mode 100644
index 00000000000..8a94520d8fd
--- /dev/null
+++ b/lib/gitlab/search/abuse_validators/no_abusive_term_length_validator.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Search
+ module AbuseValidators
+ class NoAbusiveTermLengthValidator < ActiveModel::EachValidator
+ def validate_each(instance, attribute, value)
+ return unless value.is_a?(String)
+
+ if value.split.any? { |term| term_too_long?(term) }
+ instance.errors.add attribute, 'abusive term length detected'
+ end
+ end
+
+ private
+
+ def term_too_long?(term)
+ char_limit = url_detected?(term) ? maximum_for_url : maximum
+ term.length >= char_limit
+ end
+
+ def url_detected?(uri_str)
+ URI::DEFAULT_PARSER.regexp[:ABS_URI].match? uri_str
+ end
+
+ def maximum_for_url
+ options.fetch(:maximum_for_url, maximum)
+ end
+
+ def maximum
+ options.fetch(:maximum)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/search/params.rb b/lib/gitlab/search/params.rb
new file mode 100644
index 00000000000..e6a1305a82a
--- /dev/null
+++ b/lib/gitlab/search/params.rb
@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Search
+ class Params
+ include ActiveModel::Validations
+
+ SEARCH_CHAR_LIMIT = 4096
+ SEARCH_TERM_LIMIT = 64
+
+ # Generic validation
+ validates :query_string, length: { maximum: SEARCH_CHAR_LIMIT }
+ validate :not_too_many_terms
+
+ attr_reader :raw_params, :query_string, :abuse_detection
+ alias_method :search, :query_string
+ alias_method :term, :query_string
+
+ def initialize(params, detect_abuse: true)
+ @raw_params = params.is_a?(Hash) ? params.with_indifferent_access : params.dup
+ @query_string = strip_surrounding_whitespace(@raw_params[:search] || @raw_params[:term])
+ @detect_abuse = detect_abuse
+ @abuse_detection = AbuseDetection.new(self) if @detect_abuse
+
+ validate
+ end
+
+ def [](key)
+ if respond_to? key
+ # We have this logic here to support reading custom attributes
+ # like @query_string
+ #
+ # This takes precedence over values in @raw_params
+ public_send(key) # rubocop:disable GitlabSecurity/PublicSend
+ else
+ raw_params[key]
+ end
+ end
+
+ def abusive?
+ detect_abuse? && abuse_detection.errors.any?
+ end
+
+ def valid_query_length?
+ return true unless errors.has_key? :query_string
+
+ errors[:query_string].none? { |msg| msg.include? SEARCH_CHAR_LIMIT.to_s }
+ end
+
+ def valid_terms_count?
+ return true unless errors.has_key? :query_string
+
+ errors[:query_string].none? { |msg| msg.include? SEARCH_TERM_LIMIT.to_s }
+ end
+
+ def validate
+ if detect_abuse?
+ abuse_detection.validate
+ end
+
+ super
+ end
+
+ def valid?
+ if detect_abuse?
+ abuse_detection.valid? && super
+ else
+ super
+ end
+ end
+
+ private
+
+ def detect_abuse?
+ @detect_abuse
+ end
+
+ def not_too_many_terms
+ if query_string.split.count { |word| word.length >= 3 } > SEARCH_TERM_LIMIT
+ errors.add :query_string, "has too many search terms (maximum is #{SEARCH_TERM_LIMIT})"
+ end
+ end
+
+ def strip_surrounding_whitespace(obj)
+ obj.to_s.strip
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/search/query.rb b/lib/gitlab/search/query.rb
index c0420126ada..97ee7c7817d 100644
--- a/lib/gitlab/search/query.rb
+++ b/lib/gitlab/search/query.rb
@@ -44,7 +44,7 @@ module Gitlab
next unless match
- input = match.split(':')[1..-1].join
+ input = match.split(':')[1..].join
next if input.empty?
filter[:negated] = match.start_with?("-")