summaryrefslogtreecommitdiff
path: root/lib/gitlab/pagination/keyset/order.rb
blob: e8e68a5c4a5292348b3d9b88178f5e9d7189e29a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# frozen_string_literal: true

module Gitlab
  module Pagination
    module Keyset
      # This class is a special ORDER BY clause which is compatible with ActiveRecord. It helps
      # building keyset paginated queries.
      #
      # In ActiveRecord we use the `order()` method which will generate the `ORDER BY X` SQL clause
      #
      # Project.where(active: true).order(id: :asc)
      #
      # # Or
      #
      # Project.where(active: true).order(created_at: :asc, id: desc)
      #
      # Gitlab::Pagination::Keyset::Order class encapsulates more information about the order columns
      # in order to implement keyset pagination in a generic way
      #
      # - Extract values from a record (usually the last item of the previous query)
      # - Build query conditions based on the column configuration
      #
      # Example 1: Order by primary key
      #
      #   # Simple order definition for the primary key as an ActiveRecord scope
      #   scope :id_asc_ordered, -> {
      #     keyset_order = Gitlab::Pagination::Keyset::Order.build([
      #       Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      #         attribute: :id,
      #         order_expression: Project.arel_table[:id].asc
      #       )
      #     ])
      #
      #     reorder(keyset_order)
      #   }
      #
      #   # ... Later in the application code:
      #
      #   # Compatible with ActiveRecord's `order()` method
      #   page1 = Project.where(active: true).id_asc_ordered.limit(5)
      #   keyset_order = Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(page1)
      #
      #   last_record = page1.last
      #   cursor_values = keyset_order.cursor_attributes_for_node(last_record) # { id: x }
      #
      #   page2 = keyset_order.apply_cursor_conditions(Project.where(active: true).id_asc_ordered, cursor_values).limit(5)
      #
      #   last_record = page2.last
      #   cursor_values = keyset_order.cursor_attributes_for_node(last_record)
      #
      #   page3 = keyset_order.apply_cursor_conditions(Project.where(active: true).id_asc_ordered, cursor_values).limit(5)
      #
      # Example 2: Order by creation time and primary key (primary key is the tie breaker)
      #
      #   scope :created_at_ordered, -> {
      #     keyset_order = Gitlab::Pagination::Keyset::Order.build([
      #       Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      #         attribute: :created_at,
      #         column_expression: Project.arel_table[:created_at],
      #         order_expression: Project.arel_table[:created_at].asc,
      #         distinct: false, # values in the column are not unique
      #         nullable: :nulls_last # we might see NULL values (bottom)
      #       ),
      #       Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      #         attribute: :id,
      #         order_expression: Project.arel_table[:id].asc
      #       )
      #     ])
      #
      #     reorder(keyset_order)
      #   }
      #
      class Order < Arel::Nodes::SqlLiteral
        attr_reader :column_definitions

        def initialize(column_definitions:)
          @column_definitions = column_definitions

          super(to_sql_literal(@column_definitions))
        end

        # Tells whether the given ActiveRecord::Relation has keyset ordering
        def self.keyset_aware?(scope)
          scope.order_values.first.is_a?(self) && scope.order_values.one?
        end

        def self.extract_keyset_order_object(scope)
          scope.order_values.first
        end

        def self.build(column_definitions)
          new(column_definitions: column_definitions)
        end

        def cursor_attributes_for_node(node)
          column_definitions.each_with_object({}) do |column_definition, hash|
            field_value = node[column_definition.attribute_name]
            hash[column_definition.attribute_name] = if field_value.is_a?(Time)
                                                       field_value.strftime('%Y-%m-%d %H:%M:%S.%N %Z')
                                                     elsif field_value.nil?
                                                       nil
                                                     else
                                                       field_value.to_s
                                                     end
          end
        end

        # This methods builds the conditions for the keyset pagination
        #
        # Example:
        #
        # |created_at|id|
        # |----------|--|
        # |2020-01-01| 1|
        # |      null| 2|
        # |      null| 3|
        # |2020-02-01| 4|
        #
        # Note: created_at is not distinct and nullable
        # Order `ORDER BY created_at DESC, id DESC`
        #
        # We get the following cursor values from the previous page:
        # { id: 4, created_at: '2020-02-01' }
        #
        # To get the next rows, we need to build the following conditions:
        #
        # (created_at = '2020-02-01' AND id < 4) OR (created_at < '2020-01-01')
        #
        # DESC ordering ensures that NULL values are on top so we don't need conditions for NULL values
        #
        # Another cursor example:
        # { id: 3, created_at: nil }
        #
        # To get the next rows, we need to build the following conditions:
        #
        # (id < 3 AND created_at IS NULL) OR (created_at IS NOT NULL)
        def build_where_values(values)
          return if values.blank?

          verify_incoming_values!(values)

          where_values = []

          reversed_column_definitions = column_definitions.reverse
          reversed_column_definitions.each_with_index do |column_definition, i|
            value = values[column_definition.attribute_name]

            conditions_for_column(column_definition, value).each do |condition|
              column_definitions_after_index = reversed_column_definitions.last(column_definitions.reverse.size - i - 1)

              equal_conditon_for_rest = column_definitions_after_index.map do |definition|
                definition.column_expression.eq(values[definition.attribute_name])
              end

              where_values << Arel::Nodes::Grouping.new(Arel::Nodes::And.new([condition, *equal_conditon_for_rest].compact))
            end
          end

          build_or_query(where_values)
        end

        # rubocop: disable CodeReuse/ActiveRecord
        def apply_cursor_conditions(scope, values = {})
          scope = apply_custom_projections(scope)
          scope.where(build_where_values(values))
        end
        # rubocop: enable CodeReuse/ActiveRecord

        def reversed_order
          self.class.build(column_definitions.map(&:reverse))
        end

        private

        # Adds extra columns to the SELECT clause
        def apply_custom_projections(scope)
          additional_projections = column_definitions.select(&:add_to_projections).map do |column_definition|
            # avoid mutating the original column_expression
            column_definition.column_expression.dup.as(column_definition.attribute_name).to_sql
          end

          scope = scope.select(*scope.arel.projections, *additional_projections) if additional_projections
          scope
        end

        def conditions_for_column(column_definition, value)
          conditions = []
          # Depending on the order, build a query condition fragment for taking the next rows
          if column_definition.distinct? || (!column_definition.distinct? && value.present?)
            conditions << compare_column_with_value(column_definition, value)
          end

          # When the column is nullable, additional conditions for NULL a NOT NULL values are necessary.
          # This depends on the position of the nulls (top or bottom of the resultset).
          if column_definition.nulls_first? && value.blank?
            conditions << column_definition.column_expression.not_eq(nil)
          elsif column_definition.nulls_last? && value.present?
            conditions << column_definition.column_expression.eq(nil)
          end

          conditions
        end

        def compare_column_with_value(column_definition, value)
          if column_definition.descending_order?
            column_definition.column_expression.lt(value)
          else
            column_definition.column_expression.gt(value)
          end
        end

        def build_or_query(expressions)
          or_expression = expressions.reduce { |or_expression, expression| Arel::Nodes::Or.new(or_expression, expression) }

          Arel::Nodes::Grouping.new(or_expression)
        end

        def to_sql_literal(column_definitions)
          column_definitions.map do |column_definition|
            if column_definition.order_expression.respond_to?(:to_sql)
              column_definition.order_expression.to_sql
            else
              column_definition.order_expression.to_s
            end
          end.join(', ')
        end

        def verify_incoming_values!(values)
          value_keys = values.keys.map(&:to_s)
          order_attrbute_names = column_definitions.map(&:attribute_name).map(&:to_s)
          missing_items = order_attrbute_names - value_keys
          extra_items = value_keys - order_attrbute_names

          if missing_items.any? || extra_items.any?
            error_text = ['Incorrect cursor values were given']

            error_text << "Extra items: #{extra_items.join(', ')}" if extra_items.any?
            error_text << "Missing items: #{missing_items.join(', ')}" if missing_items.any?

            error_text.compact

            raise error_text.join('. ')
          end
        end
      end
    end
  end
end