summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2022-07-01 03:08:37 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2022-07-01 03:08:37 +0000
commitd9a9116e0e78fb7f2690e88878a4b6384d80f763 (patch)
treebb974645b3b95af9e1248fcd2086c3dcd4a99887
parent516b939c44ec77bb773f08df15079c80fb4d10d2 (diff)
downloadgitlab-ce-d9a9116e0e78fb7f2690e88878a4b6384d80f763.tar.gz
Add latest changes from gitlab-org/gitlab@master
-rw-r--r--lib/gitlab/pagination/keyset/order.rb4
-rw-r--r--lib/unnested_in_filters/dsl.rb87
-rw-r--r--lib/unnested_in_filters/rewriter.rb185
-rw-r--r--spec/lib/gitlab/pagination/keyset/order_spec.rb24
-rw-r--r--spec/lib/unnested_in_filters/dsl_spec.rb31
-rw-r--r--spec/lib/unnested_in_filters/rewriter_spec.rb142
6 files changed, 473 insertions, 0 deletions
diff --git a/lib/gitlab/pagination/keyset/order.rb b/lib/gitlab/pagination/keyset/order.rb
index 290e94401b8..eecf74b8d18 100644
--- a/lib/gitlab/pagination/keyset/order.rb
+++ b/lib/gitlab/pagination/keyset/order.rb
@@ -107,6 +107,10 @@ module Gitlab
end
end
+ def attribute_names
+ column_definitions.map(&:attribute_name)
+ end
+
# This methods builds the conditions for the keyset pagination
#
# Example:
diff --git a/lib/unnested_in_filters/dsl.rb b/lib/unnested_in_filters/dsl.rb
new file mode 100644
index 00000000000..f5f358c729e
--- /dev/null
+++ b/lib/unnested_in_filters/dsl.rb
@@ -0,0 +1,87 @@
+# frozen_string_literal: true
+
+# Including the `UnnestedInFilters::Dsl` module to an ActiveRecord
+# model extends the interface of the following class instances to be
+# able to use the `use_unnested_filters` method;
+#
+# - Model relation;
+# `Model.where(...).use_unnested_filters`
+# - All the association proxies
+# `project.model_association.use_unnested_filters`
+# - All the relation instances of the association
+# `project.model_association.where(...).use_unnested_filters
+#
+# Note: The interface of the model itself won't be extended as we don't
+# have a use-case for now(`Model.use_unnested_filters` won't work).
+#
+# Example usage of the API;
+#
+# relation = Vulnerabilities::Read.where(state: [1, 4])
+# .use_unnested_filters
+# .order(severity: :desc, vulnerability_id: :desc)
+#
+# relation.to_a # => Will load records by using the optimized query
+#
+# See `UnnestedInFilters::Rewriter` for the details about the optimizations applied.
+#
+# rubocop:disable Gitlab/ModuleWithInstanceVariables
+module UnnestedInFilters
+ module Dsl
+ extend ActiveSupport::Concern
+
+ MODULES_TO_EXTEND = [
+ ActiveRecord::Relation,
+ ActiveRecord::Associations::CollectionProxy,
+ ActiveRecord::AssociationRelation
+ ].freeze
+
+ included do
+ MODULES_TO_EXTEND.each do |mod|
+ delegate_mod = relation_delegate_class(mod)
+ delegate_mod.prepend(UnnestedInFilters::Dsl::Relation)
+ end
+ end
+
+ module Relation
+ def use_unnested_filters
+ spawn.use_unnested_filters!
+ end
+
+ def use_unnested_filters!
+ assert_mutability!
+ @values[:unnested_filters] = true
+
+ self
+ end
+
+ def use_unnested_filters?
+ @values.fetch(:unnested_filters, false)
+ end
+
+ def load(*)
+ return super if loaded? || !rewrite_query?
+
+ @records = unnested_filter_rewriter.rewrite.to_a
+ @loaded = true
+
+ self
+ end
+
+ def exists?(*)
+ return super unless rewrite_query?
+
+ unnested_filter_rewriter.rewrite.exists?
+ end
+
+ private
+
+ def rewrite_query?
+ use_unnested_filters? && unnested_filter_rewriter.rewrite?
+ end
+
+ def unnested_filter_rewriter
+ @unnested_filter_rewriter ||= UnnestedInFilters::Rewriter.new(self)
+ end
+ end
+ end
+end
diff --git a/lib/unnested_in_filters/rewriter.rb b/lib/unnested_in_filters/rewriter.rb
new file mode 100644
index 00000000000..c953b673c0e
--- /dev/null
+++ b/lib/unnested_in_filters/rewriter.rb
@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+
+# rubocop:disable CodeReuse/ActiveRecord (This module is generating ActiveRecord relations therefore using AR methods is necessary)
+module UnnestedInFilters
+ class Rewriter
+ include Gitlab::Utils::StrongMemoize
+
+ class ValueTable
+ def initialize(model, attribute, values)
+ @model = model
+ @attribute = attribute.to_s
+ @values = values
+ end
+
+ def to_sql
+ "unnest(#{serialized_values}::#{sql_type}[]) AS #{table_name}(#{column_name})"
+ end
+
+ def as_predicate
+ "#{model.table_name}.#{column_name} = #{table_name}.#{column_name}"
+ end
+
+ private
+
+ attr_reader :model, :attribute, :values
+
+ delegate :connection, :columns, :attribute_types, to: :model, private: true
+ delegate :quote, :quote_table_name, :quote_column_name, to: :connection
+
+ def table_name
+ quote_table_name(attribute.pluralize)
+ end
+
+ def column_name
+ quote_column_name(attribute)
+ end
+
+ def serialized_values
+ array_type.serialize(values)
+ .then { |array| quote(array) }
+ end
+
+ def array_type
+ ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.new(attribute_types[attribute])
+ end
+
+ def sql_type
+ column.sql_type_metadata.sql_type
+ end
+
+ def column
+ columns.find { _1.name == attribute }
+ end
+ end
+
+ def initialize(relation)
+ @relation = relation
+ end
+
+ # Rewrites the given ActiveRecord::Relation object to
+ # utilize the DB indices efficiently.
+ #
+ # Example usage;
+ #
+ # relation = Vulnerabilities::Read.where(state: [1, 4])
+ # relation = relation.order(severity: :desc, vulnerability_id: :desc)
+ #
+ # rewriter = UnnestedInFilters::Rewriter.new(relation)
+ # optimized_relation = rewriter.rewrite
+ #
+ # In the above example. the `relation` object would produce the following SQL query;
+ #
+ # SELECT
+ # "vulnerability_reads".*
+ # FROM
+ # "vulnerability_reads"
+ # WHERE
+ # "vulnerability_reads"."state" IN (1, 4)
+ # ORDER BY
+ # "vulnerability_reads"."severity" DESC,
+ # "vulnerability_reads"."vulnerability_id" DESC
+ # LIMIT 20;
+ #
+ # And the `optimized_relation` object would would produce the following query to
+ # utilize the index on (state, severity, vulnerability_id);
+ #
+ # SELECT
+ # "vulnerability_reads".*
+ # FROM
+ # unnest('{1, 4}'::smallint[]) AS "states" ("state"),
+ # LATERAL (
+ # SELECT
+ # "vulnerability_reads".*
+ # FROM
+ # "vulnerability_reads"
+ # WHERE
+ # (vulnerability_reads."state" = "states"."state")
+ # ORDER BY
+ # "vulnerability_reads"."severity" DESC,
+ # "vulnerability_reads"."vulnerability_id" DESC
+ # LIMIT 20) AS vulnerability_reads
+ # ORDER BY
+ # "vulnerability_reads"."severity" DESC,
+ # "vulnerability_reads"."vulnerability_id" DESC
+ # LIMIT 20
+ #
+ def rewrite
+ model.from(from)
+ .limit(limit_value)
+ .order(order_values)
+ .includes(relation.includes_values)
+ .preload(relation.preload_values)
+ .eager_load(relation.eager_load_values)
+ end
+
+ def rewrite?
+ strong_memoize(:rewrite) do
+ in_filters.present? && has_index_coverage?
+ end
+ end
+
+ private
+
+ attr_reader :relation
+
+ delegate :model, :order_values, :limit_value, :where_values_hash, to: :relation, private: true
+
+ def from
+ [value_tables.map(&:to_sql) + [lateral]].join(', ')
+ end
+
+ def lateral
+ "LATERAL (#{join_relation.to_sql}) AS #{model.table_name}"
+ end
+
+ def join_relation
+ value_tables.reduce(unscoped_relation) do |memo, tmp_table|
+ memo.where(tmp_table.as_predicate)
+ end
+ end
+
+ def unscoped_relation
+ relation.unscope(where: in_filters.keys)
+ end
+
+ def in_filters
+ @in_filters ||= where_values_hash.select { _2.is_a?(Array) }
+ end
+
+ def has_index_coverage?
+ indices.any? do |index|
+ (filter_attributes - Array(index.columns)).empty? && # all the filter attributes are indexed
+ index.columns.last(order_attributes.length) == order_attributes && # index can be used in sorting
+ (index.columns - (filter_attributes + order_attributes)).empty? # there is no other columns in the index
+ end
+ end
+
+ def filter_attributes
+ @filter_attributes ||= where_values_hash.keys
+ end
+
+ def order_attributes
+ @order_attributes ||= order_values.flat_map(&method(:extract_column_name))
+ end
+
+ def extract_column_name(order_value)
+ case order_value
+ when Arel::Nodes::Ordering
+ order_value.expr.name
+ when ::Gitlab::Pagination::Keyset::Order
+ order_value.attribute_names
+ end
+ end
+
+ def indices
+ model.connection.schema_cache.indexes(model.table_name)
+ end
+
+ def value_tables
+ @value_tables ||= in_filters.map do |attribute, values|
+ ValueTable.new(model, attribute, values)
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/pagination/keyset/order_spec.rb b/spec/lib/gitlab/pagination/keyset/order_spec.rb
index abbb3a21cd4..c1fc73603d6 100644
--- a/spec/lib/gitlab/pagination/keyset/order_spec.rb
+++ b/spec/lib/gitlab/pagination/keyset/order_spec.rb
@@ -680,4 +680,28 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do
end
end
end
+
+ describe '#attribute_names' do
+ let(:expected_attribute_names) { %w(id name) }
+ let(:order) do
+ Gitlab::Pagination::Keyset::Order.build([
+ Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
+ attribute_name: 'id',
+ order_expression: Project.arel_table['id'].desc,
+ nullable: :not_nullable,
+ distinct: true
+ ),
+ Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
+ attribute_name: 'name',
+ order_expression: Project.arel_table['name'].desc,
+ nullable: :not_nullable,
+ distinct: true
+ )
+ ])
+ end
+
+ subject { order.attribute_names }
+
+ it { is_expected.to match_array(expected_attribute_names) }
+ end
end
diff --git a/spec/lib/unnested_in_filters/dsl_spec.rb b/spec/lib/unnested_in_filters/dsl_spec.rb
new file mode 100644
index 00000000000..bce4c88f94c
--- /dev/null
+++ b/spec/lib/unnested_in_filters/dsl_spec.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe UnnestedInFilters::Dsl do
+ let(:test_model) do
+ Class.new(ApplicationRecord) do
+ include UnnestedInFilters::Dsl
+
+ self.table_name = 'users'
+ end
+ end
+
+ describe '#exists?' do
+ let(:states) { %w(active banned) }
+
+ subject { test_model.where(state: states).use_unnested_filters.exists? }
+
+ context 'when there is no record in the database with given filters' do
+ it { is_expected.to be_falsey }
+ end
+
+ context 'when there is a record in the database with given filters' do
+ before do
+ create(:user, state: :active)
+ end
+
+ it { is_expected.to be_truthy }
+ end
+ end
+end
diff --git a/spec/lib/unnested_in_filters/rewriter_spec.rb b/spec/lib/unnested_in_filters/rewriter_spec.rb
new file mode 100644
index 00000000000..f4fff393f28
--- /dev/null
+++ b/spec/lib/unnested_in_filters/rewriter_spec.rb
@@ -0,0 +1,142 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe UnnestedInFilters::Rewriter do
+ let(:rewriter) { described_class.new(relation) }
+
+ before(:all) do
+ User.include(UnnestedInFilters::Dsl)
+ end
+
+ describe '#rewrite?' do
+ subject(:rewrite?) { rewriter.rewrite? }
+
+ context 'when the given relation does not have an `IN` predicate' do
+ let(:relation) { User.where(username: 'user') }
+
+ it { is_expected.to be_falsey }
+ end
+
+ context 'when the given relation has an `IN` predicate' do
+ context 'when there is no index coverage for the used columns' do
+ let(:relation) { User.where(username: %w(user_1 user_2), state: :active) }
+
+ it { is_expected.to be_falsey }
+ end
+
+ context 'when there is an index coverage for the used columns' do
+ let(:relation) { User.where(state: :active, user_type: [:support_bot, :alert_bot]) }
+
+ it { is_expected.to be_truthy }
+
+ context 'when there is an ordering' do
+ let(:relation) { User.where(state: %w(active blocked banned)).order(order).limit(2) }
+
+ context 'when the order is an Arel node' do
+ let(:order) { { user_type: :desc } }
+
+ it { is_expected.to be_truthy }
+ end
+
+ context 'when the order is a Keyset order' do
+ let(:order) do
+ Gitlab::Pagination::Keyset::Order.build([
+ Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
+ attribute_name: 'user_type',
+ order_expression: User.arel_table['user_type'].desc,
+ nullable: :not_nullable,
+ distinct: false
+ )
+ ])
+ end
+
+ it { is_expected.to be_truthy }
+ end
+ end
+ end
+ end
+ end
+
+ describe '#rewrite' do
+ let(:recorded_queries) { ActiveRecord::QueryRecorder.new { rewriter.rewrite.load } }
+ let(:relation) { User.where(state: :active, user_type: %i(support_bot alert_bot)).limit(2) }
+
+ let(:expected_query) do
+ <<~SQL
+ SELECT
+ "users".*
+ FROM
+ unnest('{1,2}'::smallint[]) AS "user_types"("user_type"),
+ LATERAL (
+ SELECT
+ "users".*
+ FROM
+ "users"
+ WHERE
+ "users"."state" = 'active' AND
+ (users."user_type" = "user_types"."user_type")
+ LIMIT 2
+ ) AS users
+ LIMIT 2
+ SQL
+ end
+
+ subject(:issued_query) { recorded_queries.occurrences.each_key.first }
+
+ it 'changes the query' do
+ expect(issued_query.gsub(/\s/, '')).to start_with(expected_query.gsub(/\s/, ''))
+ end
+
+ context 'when there is an order' do
+ let(:relation) { User.where(state: %w(active blocked banned)).order(order).limit(2) }
+ let(:expected_query) do
+ <<~SQL
+ SELECT
+ "users".*
+ FROM
+ unnest('{active,blocked,banned}'::charactervarying[]) AS "states"("state"),
+ LATERAL (
+ SELECT
+ "users".*
+ FROM
+ "users"
+ WHERE
+ (users."state" = "states"."state")
+ ORDER BY
+ "users"."user_type" DESC
+ LIMIT 2
+ ) AS users
+ ORDER BY
+ "users"."user_type" DESC
+ LIMIT 2
+ SQL
+ end
+
+ context 'when the order is an Arel node' do
+ let(:order) { { user_type: :desc } }
+
+ it 'changes the query' do
+ expect(issued_query.gsub(/\s/, '')).to start_with(expected_query.gsub(/\s/, ''))
+ end
+ end
+
+ context 'when the order is a Keyset order' do
+ let(:order) do
+ Gitlab::Pagination::Keyset::Order.build([
+ Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
+ attribute_name: 'user_type',
+ order_expression: User.arel_table['user_type'].desc,
+ nullable: :not_nullable,
+ distinct: false
+ )
+ ])
+ end
+
+ it 'changes the query' do
+ expect(issued_query.gsub(/\s/, '')).to start_with(expected_query.gsub(/\s/, ''))
+ end
+ end
+ end
+ end
+end