summaryrefslogtreecommitdiff
path: root/lib/gitlab/pagination/keyset/order.rb
blob: 1a00692bdbe5322afee82a46736de1a7bc6a6b75 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# frozen_string_literal: true

module Gitlab
  module Pagination
    module Keyset
      # This class is a special ORDER BY clause which is compatible with ActiveRecord. It helps
      # building keyset paginated queries.
      #
      # In ActiveRecord we use the `order()` method which will generate the `ORDER BY X` SQL clause
      #
      # Project.where(active: true).order(id: :asc)
      #
      # # Or
      #
      # Project.where(active: true).order(created_at: :asc, id: desc)
      #
      # Gitlab::Pagination::Keyset::Order class encapsulates more information about the order columns
      # in order to implement keyset pagination in a generic way
      #
      # - Extract values from a record (usually the last item of the previous query)
      # - Build query conditions based on the column configuration
      #
      # Example 1: Order by primary key
      #
      #   # Simple order definition for the primary key as an ActiveRecord scope
      #   scope :id_asc_ordered, -> {
      #     keyset_order = Gitlab::Pagination::Keyset::Order.build([
      #       Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      #         attribute: :id,
      #         order_expression: Project.arel_table[:id].asc
      #       )
      #     ])
      #
      #     reorder(keyset_order)
      #   }
      #
      #   # ... Later in the application code:
      #
      #   # Compatible with ActiveRecord's `order()` method
      #   page1 = Project.where(active: true).id_asc_ordered.limit(5)
      #   keyset_order = Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(page1)
      #
      #   last_record = page1.last
      #   cursor_values = keyset_order.cursor_attributes_for_node(last_record) # { id: x }
      #
      #   page2 = keyset_order.apply_cursor_conditions(Project.where(active: true).id_asc_ordered, cursor_values).limit(5)
      #
      #   last_record = page2.last
      #   cursor_values = keyset_order.cursor_attributes_for_node(last_record)
      #
      #   page3 = keyset_order.apply_cursor_conditions(Project.where(active: true).id_asc_ordered, cursor_values).limit(5)
      #
      # Example 2: Order by creation time and primary key (primary key is the tie breaker)
      #
      #   scope :created_at_ordered, -> {
      #     keyset_order = Gitlab::Pagination::Keyset::Order.build([
      #       Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      #         attribute_name: :created_at,
      #         column_expression: Project.arel_table[:created_at],
      #         order_expression: Project.arel_table[:created_at].asc,
      #         distinct: false, # values in the column are not unique
      #         nullable: :nulls_last # we might see NULL values (bottom)
      #       ),
      #       Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      #         attribute_name: :id,
      #         order_expression: Project.arel_table[:id].asc
      #       )
      #     ])
      #
      #     reorder(keyset_order)
      #   }
      #
      class Order < Arel::Nodes::SqlLiteral
        attr_reader :column_definitions

        def initialize(column_definitions:)
          @column_definitions = column_definitions

          super(to_sql_literal(@column_definitions))
        end

        # Tells whether the given ActiveRecord::Relation has keyset ordering
        def self.keyset_aware?(scope)
          scope.order_values.first.is_a?(self) && scope.order_values.one?
        end

        def self.extract_keyset_order_object(scope)
          scope.order_values.first
        end

        def self.build(column_definitions)
          new(column_definitions: column_definitions)
        end

        def cursor_attributes_for_node(node)
          column_definitions.each_with_object({}.with_indifferent_access) do |column_definition, hash|
            field_value = node[column_definition.attribute_name]
            hash[column_definition.attribute_name] = if field_value.is_a?(Time)
                                                       field_value.strftime('%Y-%m-%d %H:%M:%S.%N %Z')
                                                     elsif field_value.nil?
                                                       nil
                                                     else
                                                       field_value.to_s
                                                     end
          end
        end

        # This methods builds the conditions for the keyset pagination
        #
        # Example:
        #
        # |created_at|id|
        # |----------|--|
        # |2020-01-01| 1|
        # |      null| 2|
        # |      null| 3|
        # |2020-02-01| 4|
        #
        # Note: created_at is not distinct and nullable
        # Order `ORDER BY created_at DESC, id DESC`
        #
        # We get the following cursor values from the previous page:
        # { id: 4, created_at: '2020-02-01' }
        #
        # To get the next rows, we need to build the following conditions:
        #
        # (created_at = '2020-02-01' AND id < 4) OR (created_at < '2020-01-01')
        #
        # DESC ordering ensures that NULL values are on top so we don't need conditions for NULL values
        #
        # Another cursor example:
        # { id: 3, created_at: nil }
        #
        # To get the next rows, we need to build the following conditions:
        #
        # (id < 3 AND created_at IS NULL) OR (created_at IS NOT NULL)
        def build_where_values(values)
          return [] if values.blank?

          verify_incoming_values!(values)

          return use_composite_row_comparison(values) if composite_row_comparison_possible?

          column_definitions
            .map { ColumnConditionBuilder.new(_1, values[_1.attribute_name]) }
            .reverse
            .reduce([]) { |where_conditions, column| column.where_conditions(where_conditions) }
        end

        def where_values_with_or_query(values)
          build_or_query(build_where_values(values.with_indifferent_access))
        end

        # rubocop: disable CodeReuse/ActiveRecord
        def apply_cursor_conditions(scope, values = {}, options = { use_union_optimization: false, in_operator_optimization_options: nil })
          values ||= {}
          transformed_values = values.with_indifferent_access
          scope = apply_custom_projections(scope)

          where_values = build_where_values(transformed_values)

          if options[:use_union_optimization] && where_values.size > 1
            build_union_query(scope, where_values).reorder(self)
          elsif options[:in_operator_optimization_options]
            opts = options[:in_operator_optimization_options]

            Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder.new(
              **{
                scope: scope.reorder(self),
                values: values
              }.merge(opts)
            ).execute
          else
            scope.where(build_or_query(where_values)) # rubocop: disable CodeReuse/ActiveRecord
          end
        end
        # rubocop: enable CodeReuse/ActiveRecord

        def reversed_order
          self.class.build(column_definitions.map(&:reverse))
        end

        alias_method :to_sql, :to_s

        private

        def composite_row_comparison_possible?
          !column_definitions.one? &&
            column_definitions.all?(&:not_nullable?) &&
            column_definitions.map(&:order_direction).uniq.one? # all columns uses the same order direction
        end

        # composite row comparison works with NOT NULL columns and may use only one index scan given a proper index setup
        # Example: (created_at, id) > ('2012-09-18 01:40:01+00', 15)
        def use_composite_row_comparison(values)
          columns = Arel::Nodes::Grouping.new(column_definitions.map(&:column_expression))
          values = Arel::Nodes::Grouping.new(column_definitions.map do |column_definition|
            value = values[column_definition.attribute_name]
            build_quoted(value, column_definition.column_expression)
          end)

          if column_definitions.first.ascending_order?
            [columns.gt(values)]
          else
            [columns.lt(values)]
          end
        end

        def build_quoted(value, column_expression)
          return value if value.instance_of?(Arel::Nodes::SqlLiteral)

          Arel::Nodes.build_quoted(value, column_expression)
        end

        # Adds extra columns to the SELECT clause
        def apply_custom_projections(scope)
          additional_projections = column_definitions.select(&:add_to_projections).map do |column_definition|
            # avoid mutating the original column_expression
            column_definition.column_expression.dup.as(column_definition.attribute_name).to_sql
          end

          scope = scope.reselect(*scope.arel.projections, *additional_projections) unless additional_projections.blank?
          scope
        end

        def build_or_query(expressions)
          return [] if expressions.blank?

          or_expression = expressions.reduce { |or_expression, expression| Arel::Nodes::Or.new(or_expression, expression) }
          Arel::Nodes::Grouping.new(or_expression)
        end

        def build_union_query(scope, where_values)
          scopes = where_values.map do |where_value|
            scope.dup.where(where_value).reorder(self) # rubocop: disable CodeReuse/ActiveRecord
          end
          scope.model.from_union(scopes, remove_duplicates: false, remove_order: false)
        end

        def to_sql_literal(column_definitions)
          column_definitions.map do |column_definition|
            if column_definition.order_expression.respond_to?(:to_sql)
              column_definition.order_expression.to_sql
            else
              column_definition.order_expression.to_s
            end
          end.join(', ')
        end

        def verify_incoming_values!(values)
          value_keys = values.keys.map(&:to_s)
          order_attrbute_names = column_definitions.map(&:attribute_name).map(&:to_s)
          missing_items = order_attrbute_names - value_keys
          extra_items = value_keys - order_attrbute_names

          if missing_items.any? || extra_items.any?
            error_text = ['Incorrect cursor values were given']

            error_text << "Extra items: #{extra_items.join(', ')}" if extra_items.any?
            error_text << "Missing items: #{missing_items.join(', ')}" if missing_items.any?

            error_text.compact

            raise error_text.join('. ')
          end
        end
      end
    end
  end
end