From e2ee1eec50aa8df8543d7ecc585ec0ba5ee544ac Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Wed, 8 Apr 2020 03:09:31 +0000 Subject: Add latest changes from gitlab-org/gitlab@master --- lib/gitlab/graphql/connections.rb | 22 ---- .../externally_paginated_array_connection.rb | 35 ------ .../connections/filterable_array_connection.rb | 17 --- .../keyset/conditions/base_condition.rb | 55 -------- .../keyset/conditions/not_null_condition.rb | 57 --------- .../keyset/conditions/null_condition.rb | 41 ------ .../graphql/connections/keyset/connection.rb | 139 --------------------- .../graphql/connections/keyset/order_info.rb | 95 -------------- .../graphql/connections/keyset/query_builder.rb | 67 ---------- lib/gitlab/graphql/pagination/connections.rb | 23 ++++ .../externally_paginated_array_connection.rb | 29 +++++ .../pagination/filterable_array_connection.rb | 17 +++ .../pagination/keyset/conditions/base_condition.rb | 55 ++++++++ .../keyset/conditions/not_null_condition.rb | 57 +++++++++ .../pagination/keyset/conditions/null_condition.rb | 41 ++++++ lib/gitlab/graphql/pagination/keyset/connection.rb | 139 +++++++++++++++++++++ lib/gitlab/graphql/pagination/keyset/order_info.rb | 95 ++++++++++++++ .../graphql/pagination/keyset/query_builder.rb | 67 ++++++++++ lib/gitlab/import_export/group/tree_restorer.rb | 1 + lib/gitlab/import_export/json/legacy_reader.rb | 19 +-- lib/gitlab/import_export/json/ndjson_reader.rb | 61 +++++++++ lib/gitlab/import_export/project/tree_restorer.rb | 28 ++++- lib/gitlab/import_export/relation_tree_restorer.rb | 2 +- 23 files changed, 620 insertions(+), 542 deletions(-) delete mode 100644 lib/gitlab/graphql/connections.rb delete mode 100644 lib/gitlab/graphql/connections/externally_paginated_array_connection.rb delete mode 100644 lib/gitlab/graphql/connections/filterable_array_connection.rb delete mode 100644 lib/gitlab/graphql/connections/keyset/conditions/base_condition.rb delete mode 100644 lib/gitlab/graphql/connections/keyset/conditions/not_null_condition.rb delete mode 100644 lib/gitlab/graphql/connections/keyset/conditions/null_condition.rb delete mode 100644 lib/gitlab/graphql/connections/keyset/connection.rb delete mode 100644 lib/gitlab/graphql/connections/keyset/order_info.rb delete mode 100644 lib/gitlab/graphql/connections/keyset/query_builder.rb create mode 100644 lib/gitlab/graphql/pagination/connections.rb create mode 100644 lib/gitlab/graphql/pagination/externally_paginated_array_connection.rb create mode 100644 lib/gitlab/graphql/pagination/filterable_array_connection.rb create mode 100644 lib/gitlab/graphql/pagination/keyset/conditions/base_condition.rb create mode 100644 lib/gitlab/graphql/pagination/keyset/conditions/not_null_condition.rb create mode 100644 lib/gitlab/graphql/pagination/keyset/conditions/null_condition.rb create mode 100644 lib/gitlab/graphql/pagination/keyset/connection.rb create mode 100644 lib/gitlab/graphql/pagination/keyset/order_info.rb create mode 100644 lib/gitlab/graphql/pagination/keyset/query_builder.rb create mode 100644 lib/gitlab/import_export/json/ndjson_reader.rb (limited to 'lib') diff --git a/lib/gitlab/graphql/connections.rb b/lib/gitlab/graphql/connections.rb deleted file mode 100644 index 08d5cd0b72e..00000000000 --- a/lib/gitlab/graphql/connections.rb +++ /dev/null @@ -1,22 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Graphql - module Connections - def self.use(_schema) - GraphQL::Relay::BaseConnection.register_connection_implementation( - ActiveRecord::Relation, - Gitlab::Graphql::Connections::Keyset::Connection - ) - GraphQL::Relay::BaseConnection.register_connection_implementation( - Gitlab::Graphql::FilterableArray, - Gitlab::Graphql::Connections::FilterableArrayConnection - ) - GraphQL::Relay::BaseConnection.register_connection_implementation( - Gitlab::Graphql::ExternallyPaginatedArray, - Gitlab::Graphql::Connections::ExternallyPaginatedArrayConnection - ) - end - end - end -end diff --git a/lib/gitlab/graphql/connections/externally_paginated_array_connection.rb b/lib/gitlab/graphql/connections/externally_paginated_array_connection.rb deleted file mode 100644 index f0861260691..00000000000 --- a/lib/gitlab/graphql/connections/externally_paginated_array_connection.rb +++ /dev/null @@ -1,35 +0,0 @@ -# frozen_string_literal: true - -# Make a customized connection type -module Gitlab - module Graphql - module Connections - class ExternallyPaginatedArrayConnection < GraphQL::Relay::ArrayConnection - # As the pagination happens externally - # we just return all the nodes here. - def sliced_nodes - @nodes - end - - def start_cursor - nodes.previous_cursor - end - - def end_cursor - nodes.next_cursor - end - - def next_page? - end_cursor.present? - end - - def previous_page? - start_cursor.present? - end - - alias_method :has_next_page, :next_page? - alias_method :has_previous_page, :previous_page? - end - end - end -end diff --git a/lib/gitlab/graphql/connections/filterable_array_connection.rb b/lib/gitlab/graphql/connections/filterable_array_connection.rb deleted file mode 100644 index 800f2c949c6..00000000000 --- a/lib/gitlab/graphql/connections/filterable_array_connection.rb +++ /dev/null @@ -1,17 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Graphql - module Connections - # FilterableArrayConnection is useful especially for lazy-loaded values. - # It allows us to call a callback only on the slice of array being - # rendered in the "after loaded" phase. For example we can check - # permissions only on a small subset of items. - class FilterableArrayConnection < GraphQL::Relay::ArrayConnection - def paged_nodes - @filtered_nodes ||= nodes.filter_callback.call(super) - end - end - end - end -end diff --git a/lib/gitlab/graphql/connections/keyset/conditions/base_condition.rb b/lib/gitlab/graphql/connections/keyset/conditions/base_condition.rb deleted file mode 100644 index 26c9d77a8df..00000000000 --- a/lib/gitlab/graphql/connections/keyset/conditions/base_condition.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Graphql - module Connections - module Keyset - module Conditions - class BaseCondition - # @param [Arel::Table] arel_table for the relation being ordered - # @param [Array] order_list of extracted orderings - # @param [Array] values from the decoded cursor - # @param [Array] operators determining sort comparison - # @param [Symbol] before_or_after indicates whether we want - # items :before the cursor or :after the cursor - def initialize(arel_table, order_list, values, operators, before_or_after) - @arel_table, @order_list, @values, @operators, @before_or_after = arel_table, order_list, values, operators, before_or_after - - @before_or_after = :after unless [:after, :before].include?(@before_or_after) - end - - def build - raise NotImplementedError - end - - private - - attr_reader :arel_table, :order_list, :values, :operators, :before_or_after - - def table_condition(order_info, value, operator) - if order_info.named_function - target = order_info.named_function - value = value&.downcase if target&.name&.downcase == 'lower' - else - target = arel_table[order_info.attribute_name] - end - - case operator - when '>' - target.gt(value) - when '<' - target.lt(value) - when '=' - target.eq(value) - when 'is_null' - target.eq(nil) - when 'is_not_null' - target.not_eq(nil) - end - end - end - end - end - end - end -end diff --git a/lib/gitlab/graphql/connections/keyset/conditions/not_null_condition.rb b/lib/gitlab/graphql/connections/keyset/conditions/not_null_condition.rb deleted file mode 100644 index 3239d27c0cd..00000000000 --- a/lib/gitlab/graphql/connections/keyset/conditions/not_null_condition.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Graphql - module Connections - module Keyset - module Conditions - class NotNullCondition < BaseCondition - def build - conditions = [first_attribute_condition] - - # If there is only one order field, we can assume it - # does not contain NULLs, and don't need additional - # conditions - unless order_list.count == 1 - conditions << [second_attribute_condition, final_condition] - end - - conditions.join - end - - private - - # ex: "(relative_position > 23)" - def first_attribute_condition - <<~SQL - (#{table_condition(order_list.first, values.first, operators.first).to_sql}) - SQL - end - - # ex: " OR (relative_position = 23 AND id > 500)" - def second_attribute_condition - condition = <<~SQL - OR ( - #{table_condition(order_list.first, values.first, '=').to_sql} - AND - #{table_condition(order_list[1], values[1], operators[1]).to_sql} - ) - SQL - - condition - end - - # ex: " OR (relative_position IS NULL)" - def final_condition - if before_or_after == :after - <<~SQL - OR (#{table_condition(order_list.first, nil, 'is_null').to_sql}) - SQL - end - end - end - end - end - end - end -end diff --git a/lib/gitlab/graphql/connections/keyset/conditions/null_condition.rb b/lib/gitlab/graphql/connections/keyset/conditions/null_condition.rb deleted file mode 100644 index 18ea0692e2c..00000000000 --- a/lib/gitlab/graphql/connections/keyset/conditions/null_condition.rb +++ /dev/null @@ -1,41 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Graphql - module Connections - module Keyset - module Conditions - class NullCondition < BaseCondition - def build - [first_attribute_condition, final_condition].join - end - - private - - # ex: "(relative_position IS NULL AND id > 500)" - def first_attribute_condition - condition = <<~SQL - ( - #{table_condition(order_list.first, nil, 'is_null').to_sql} - AND - #{table_condition(order_list[1], values[1], operators[1]).to_sql} - ) - SQL - - condition - end - - # ex: " OR (relative_position IS NOT NULL)" - def final_condition - if before_or_after == :before - <<~SQL - OR (#{table_condition(order_list.first, nil, 'is_not_null').to_sql}) - SQL - end - end - end - end - end - end - end -end diff --git a/lib/gitlab/graphql/connections/keyset/connection.rb b/lib/gitlab/graphql/connections/keyset/connection.rb deleted file mode 100644 index 5de075f2f7a..00000000000 --- a/lib/gitlab/graphql/connections/keyset/connection.rb +++ /dev/null @@ -1,139 +0,0 @@ -# frozen_string_literal: true - -# Keyset::Connection provides cursor based pagination, to avoid using OFFSET. -# It basically sorts / filters using WHERE sorting_value > cursor. -# We do this for performance reasons (https://gitlab.com/gitlab-org/gitlab-foss/issues/45756), -# as well as for having stable pagination -# https://graphql-ruby.org/pro/cursors.html#whats-the-difference -# https://coderwall.com/p/lkcaag/pagination-you-re-probably-doing-it-wrong -# -# It currently supports sorting on two columns, but the last column must -# be the primary key. If it's not already included, an order on the -# primary key will be added automatically, like `order(id: :desc)` -# -# Issue.order(created_at: :asc).order(:id) -# Issue.order(due_date: :asc) -# -# You can also use `Gitlab::Database.nulls_last_order`: -# -# Issue.reorder(::Gitlab::Database.nulls_last_order('due_date', 'DESC')) -# -# It will tolerate non-attribute ordering, but only attributes determine the cursor. -# For example, this is legitimate: -# -# Issue.order('issues.due_date IS NULL').order(due_date: :asc).order(:id) -# -# but anything more complex has a chance of not working. -# -module Gitlab - module Graphql - module Connections - module Keyset - class Connection < GraphQL::Relay::BaseConnection - include Gitlab::Utils::StrongMemoize - - def cursor_from_node(node) - encoded_json_from_ordering(node) - end - - def sliced_nodes - @sliced_nodes ||= - begin - OrderInfo.validate_ordering(ordered_nodes, order_list) - - sliced = ordered_nodes - sliced = slice_nodes(sliced, before, :before) if before.present? - sliced = slice_nodes(sliced, after, :after) if after.present? - - sliced - end - end - - def paged_nodes - # These are the nodes that will be loaded into memory for rendering - # So we're ok loading them into memory here as that's bound to happen - # anyway. Having them ready means we can modify the result while - # rendering the fields. - @paged_nodes ||= load_paged_nodes.to_a - end - - private - - def load_paged_nodes - if first && last - raise Gitlab::Graphql::Errors::ArgumentError.new("Can only provide either `first` or `last`, not both") - end - - if last - sliced_nodes.last(limit_value) - else - sliced_nodes.limit(limit_value) # rubocop: disable CodeReuse/ActiveRecord - end - end - - # rubocop: disable CodeReuse/ActiveRecord - def slice_nodes(sliced, encoded_cursor, before_or_after) - decoded_cursor = ordering_from_encoded_json(encoded_cursor) - builder = QueryBuilder.new(arel_table, order_list, decoded_cursor, before_or_after) - ordering = builder.conditions - - sliced.where(*ordering).where.not(id: decoded_cursor['id']) - end - # rubocop: enable CodeReuse/ActiveRecord - - def limit_value - @limit_value ||= [first, last, max_page_size].compact.min - end - - def ordered_nodes - strong_memoize(:order_nodes) do - unless nodes.primary_key.present? - raise ArgumentError.new('Relation must have a primary key') - end - - list = OrderInfo.build_order_list(nodes) - - # ensure there is a primary key ordering - if list&.last&.attribute_name != nodes.primary_key - nodes.order(arel_table[nodes.primary_key].desc) # rubocop: disable CodeReuse/ActiveRecord - else - nodes - end - end - end - - def order_list - strong_memoize(:order_list) do - OrderInfo.build_order_list(ordered_nodes) - end - end - - def arel_table - nodes.arel_table - end - - # Storing the current order values in the cursor allows us to - # make an intelligent decision on handling NULL values. - # Otherwise we would either need to fetch the record first, - # or fetch it in the SQL, significantly complicating it. - def encoded_json_from_ordering(node) - ordering = { 'id' => node[:id].to_s } - - order_list.each do |field| - field_name = field.attribute_name - ordering[field_name] = node[field_name].to_s - end - - encode(ordering.to_json) - end - - def ordering_from_encoded_json(cursor) - JSON.parse(decode(cursor)) - rescue JSON::ParserError - raise Gitlab::Graphql::Errors::ArgumentError, "Please provide a valid cursor" - end - end - end - end - end -end diff --git a/lib/gitlab/graphql/connections/keyset/order_info.rb b/lib/gitlab/graphql/connections/keyset/order_info.rb deleted file mode 100644 index 7f61bf937b4..00000000000 --- a/lib/gitlab/graphql/connections/keyset/order_info.rb +++ /dev/null @@ -1,95 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Graphql - module Connections - module Keyset - class OrderInfo - attr_reader :attribute_name, :sort_direction, :named_function - - def initialize(order_value) - @attribute_name, @sort_direction, @named_function = - if order_value.is_a?(String) - extract_nulls_last_order(order_value) - else - extract_attribute_values(order_value) - end - end - - def operator_for(before_or_after) - case before_or_after - when :before - sort_direction == :asc ? '<' : '>' - when :after - sort_direction == :asc ? '>' : '<' - end - end - - # Only allow specific node types - def self.build_order_list(relation) - order_list = relation.order_values.select do |value| - supported_order_value?(value) - end - - order_list.map { |info| OrderInfo.new(info) } - end - - def self.validate_ordering(relation, order_list) - if order_list.empty? - raise ArgumentError.new('A minimum of 1 ordering field is required') - end - - if order_list.count > 2 - raise ArgumentError.new('A maximum of 2 ordering fields are allowed') - end - - # make sure the last ordering field is non-nullable - attribute_name = order_list.last&.attribute_name - - if relation.columns_hash[attribute_name].null - raise ArgumentError.new("Column `#{attribute_name}` must not allow NULL") - end - - if order_list.last.attribute_name != relation.primary_key - raise ArgumentError.new("Last ordering field must be the primary key, `#{relation.primary_key}`") - end - end - - def self.supported_order_value?(order_value) - return true if order_value.is_a?(Arel::Nodes::Ascending) || order_value.is_a?(Arel::Nodes::Descending) - return false unless order_value.is_a?(String) - - tokens = order_value.downcase.split - - tokens.last(2) == %w(nulls last) && tokens.count == 4 - end - - private - - def extract_nulls_last_order(order_value) - tokens = order_value.downcase.split - - [tokens.first, (tokens[1] == 'asc' ? :asc : :desc), nil] - end - - def extract_attribute_values(order_value) - named = nil - name = if ordering_by_lower?(order_value) - named = order_value.expr - named.expressions[0].name.to_s - else - order_value.expr.name - end - - [name, order_value.direction, named] - end - - # determine if ordering using LOWER, eg. "ORDER BY LOWER(boards.name)" - def ordering_by_lower?(order_value) - order_value.expr.is_a?(Arel::Nodes::NamedFunction) && order_value.expr&.name&.downcase == 'lower' - end - end - end - end - end -end diff --git a/lib/gitlab/graphql/connections/keyset/query_builder.rb b/lib/gitlab/graphql/connections/keyset/query_builder.rb deleted file mode 100644 index fe85898f638..00000000000 --- a/lib/gitlab/graphql/connections/keyset/query_builder.rb +++ /dev/null @@ -1,67 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Graphql - module Connections - module Keyset - class QueryBuilder - def initialize(arel_table, order_list, decoded_cursor, before_or_after) - @arel_table, @order_list, @decoded_cursor, @before_or_after = arel_table, order_list, decoded_cursor, before_or_after - - if order_list.empty? - raise ArgumentError.new('No ordering scopes have been supplied') - end - end - - # Based on whether the main field we're ordering on is NULL in the - # cursor, we can more easily target our query condition. - # We assume that the last ordering field is unique, meaning - # it will not contain NULLs. - # We currently only support two ordering fields. - # - # Example of the conditions for - # relation: Issue.order(relative_position: :asc).order(id: :asc) - # after cursor: relative_position: 1500, id: 500 - # - # when cursor[relative_position] is not NULL - # - # ("issues"."relative_position" > 1500) - # OR ( - # "issues"."relative_position" = 1500 - # AND - # "issues"."id" > 500 - # ) - # OR ("issues"."relative_position" IS NULL) - # - # when cursor[relative_position] is NULL - # - # "issues"."relative_position" IS NULL - # AND - # "issues"."id" > 500 - # - def conditions - attr_values = order_list.map { |field| decoded_cursor[field.attribute_name] } - - if order_list.count == 1 && attr_values.first.nil? - raise Gitlab::Graphql::Errors::ArgumentError.new('Before/after cursor invalid: `nil` was provided as only sortable value') - end - - if order_list.count == 1 || attr_values.first.present? - Keyset::Conditions::NotNullCondition.new(arel_table, order_list, attr_values, operators, before_or_after).build - else - Keyset::Conditions::NullCondition.new(arel_table, order_list, attr_values, operators, before_or_after).build - end - end - - private - - attr_reader :arel_table, :order_list, :decoded_cursor, :before_or_after - - def operators - order_list.map { |field| field.operator_for(before_or_after) } - end - end - end - end - end -end diff --git a/lib/gitlab/graphql/pagination/connections.rb b/lib/gitlab/graphql/pagination/connections.rb new file mode 100644 index 00000000000..febdc938317 --- /dev/null +++ b/lib/gitlab/graphql/pagination/connections.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + module Connections + def self.use(schema) + schema.connections.add( + ActiveRecord::Relation, + Gitlab::Graphql::Pagination::Keyset::Connection) + + schema.connections.add( + Gitlab::Graphql::FilterableArray, + Gitlab::Graphql::Pagination::FilterableArrayConnection) + + schema.connections.add( + Gitlab::Graphql::ExternallyPaginatedArray, + Gitlab::Graphql::Pagination::ExternallyPaginatedArrayConnection) + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/externally_paginated_array_connection.rb b/lib/gitlab/graphql/pagination/externally_paginated_array_connection.rb new file mode 100644 index 00000000000..1f01dd07571 --- /dev/null +++ b/lib/gitlab/graphql/pagination/externally_paginated_array_connection.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +# Make a customized connection type +module Gitlab + module Graphql + module Pagination + class ExternallyPaginatedArrayConnection < GraphQL::Pagination::ArrayConnection + def start_cursor + items.previous_cursor + end + + def end_cursor + items.next_cursor + end + + def next_page? + end_cursor.present? + end + + def previous_page? + start_cursor.present? + end + + alias_method :has_next_page, :next_page? + alias_method :has_previous_page, :previous_page? + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/filterable_array_connection.rb b/lib/gitlab/graphql/pagination/filterable_array_connection.rb new file mode 100644 index 00000000000..4a76cd5fb00 --- /dev/null +++ b/lib/gitlab/graphql/pagination/filterable_array_connection.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + # FilterableArrayConnection is useful especially for lazy-loaded values. + # It allows us to call a callback only on the slice of array being + # rendered in the "after loaded" phase. For example we can check + # permissions only on a small subset of items. + class FilterableArrayConnection < GraphQL::Pagination::ArrayConnection + def nodes + @nodes ||= items.filter_callback.call(super) + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/keyset/conditions/base_condition.rb b/lib/gitlab/graphql/pagination/keyset/conditions/base_condition.rb new file mode 100644 index 00000000000..afea7c602be --- /dev/null +++ b/lib/gitlab/graphql/pagination/keyset/conditions/base_condition.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + module Keyset + module Conditions + class BaseCondition + # @param [Arel::Table] arel_table for the relation being ordered + # @param [Array] order_list of extracted orderings + # @param [Array] values from the decoded cursor + # @param [Array] operators determining sort comparison + # @param [Symbol] before_or_after indicates whether we want + # items :before the cursor or :after the cursor + def initialize(arel_table, order_list, values, operators, before_or_after) + @arel_table, @order_list, @values, @operators, @before_or_after = arel_table, order_list, values, operators, before_or_after + + @before_or_after = :after unless [:after, :before].include?(@before_or_after) + end + + def build + raise NotImplementedError + end + + private + + attr_reader :arel_table, :order_list, :values, :operators, :before_or_after + + def table_condition(order_info, value, operator) + if order_info.named_function + target = order_info.named_function + value = value&.downcase if target&.name&.downcase == 'lower' + else + target = arel_table[order_info.attribute_name] + end + + case operator + when '>' + target.gt(value) + when '<' + target.lt(value) + when '=' + target.eq(value) + when 'is_null' + target.eq(nil) + when 'is_not_null' + target.not_eq(nil) + end + end + end + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/keyset/conditions/not_null_condition.rb b/lib/gitlab/graphql/pagination/keyset/conditions/not_null_condition.rb new file mode 100644 index 00000000000..3164598b7b9 --- /dev/null +++ b/lib/gitlab/graphql/pagination/keyset/conditions/not_null_condition.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + module Keyset + module Conditions + class NotNullCondition < BaseCondition + def build + conditions = [first_attribute_condition] + + # If there is only one order field, we can assume it + # does not contain NULLs, and don't need additional + # conditions + unless order_list.count == 1 + conditions << [second_attribute_condition, final_condition] + end + + conditions.join + end + + private + + # ex: "(relative_position > 23)" + def first_attribute_condition + <<~SQL + (#{table_condition(order_list.first, values.first, operators.first).to_sql}) + SQL + end + + # ex: " OR (relative_position = 23 AND id > 500)" + def second_attribute_condition + condition = <<~SQL + OR ( + #{table_condition(order_list.first, values.first, '=').to_sql} + AND + #{table_condition(order_list[1], values[1], operators[1]).to_sql} + ) + SQL + + condition + end + + # ex: " OR (relative_position IS NULL)" + def final_condition + if before_or_after == :after + <<~SQL + OR (#{table_condition(order_list.first, nil, 'is_null').to_sql}) + SQL + end + end + end + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/keyset/conditions/null_condition.rb b/lib/gitlab/graphql/pagination/keyset/conditions/null_condition.rb new file mode 100644 index 00000000000..fa25181d663 --- /dev/null +++ b/lib/gitlab/graphql/pagination/keyset/conditions/null_condition.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + module Keyset + module Conditions + class NullCondition < BaseCondition + def build + [first_attribute_condition, final_condition].join + end + + private + + # ex: "(relative_position IS NULL AND id > 500)" + def first_attribute_condition + condition = <<~SQL + ( + #{table_condition(order_list.first, nil, 'is_null').to_sql} + AND + #{table_condition(order_list[1], values[1], operators[1]).to_sql} + ) + SQL + + condition + end + + # ex: " OR (relative_position IS NOT NULL)" + def final_condition + if before_or_after == :before + <<~SQL + OR (#{table_condition(order_list.first, nil, 'is_not_null').to_sql}) + SQL + end + end + end + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/keyset/connection.rb b/lib/gitlab/graphql/pagination/keyset/connection.rb new file mode 100644 index 00000000000..5466924a794 --- /dev/null +++ b/lib/gitlab/graphql/pagination/keyset/connection.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +# Keyset::Connection provides cursor based pagination, to avoid using OFFSET. +# It basically sorts / filters using WHERE sorting_value > cursor. +# We do this for performance reasons (https://gitlab.com/gitlab-org/gitlab-foss/issues/45756), +# as well as for having stable pagination +# https://graphql-ruby.org/pro/cursors.html#whats-the-difference +# https://coderwall.com/p/lkcaag/pagination-you-re-probably-doing-it-wrong +# +# It currently supports sorting on two columns, but the last column must +# be the primary key. If it's not already included, an order on the +# primary key will be added automatically, like `order(id: :desc)` +# +# Issue.order(created_at: :asc).order(:id) +# Issue.order(due_date: :asc) +# +# You can also use `Gitlab::Database.nulls_last_order`: +# +# Issue.reorder(::Gitlab::Database.nulls_last_order('due_date', 'DESC')) +# +# It will tolerate non-attribute ordering, but only attributes determine the cursor. +# For example, this is legitimate: +# +# Issue.order('issues.due_date IS NULL').order(due_date: :asc).order(:id) +# +# but anything more complex has a chance of not working. +# +module Gitlab + module Graphql + module Pagination + module Keyset + class Connection < GraphQL::Pagination::ActiveRecordRelationConnection + include Gitlab::Utils::StrongMemoize + + def cursor_for(node) + encoded_json_from_ordering(node) + end + + def sliced_nodes + @sliced_nodes ||= + begin + OrderInfo.validate_ordering(ordered_items, order_list) + + sliced = ordered_items + sliced = slice_nodes(sliced, before, :before) if before.present? + sliced = slice_nodes(sliced, after, :after) if after.present? + + sliced + end + end + + def nodes + # These are the nodes that will be loaded into memory for rendering + # So we're ok loading them into memory here as that's bound to happen + # anyway. Having them ready means we can modify the result while + # rendering the fields. + @nodes ||= load_paged_nodes.to_a + end + + private + + def load_paged_nodes + if first && last + raise Gitlab::Graphql::Errors::ArgumentError.new("Can only provide either `first` or `last`, not both") + end + + if last + sliced_nodes.last(limit_value) + else + sliced_nodes.limit(limit_value) # rubocop: disable CodeReuse/ActiveRecord + end + end + + # rubocop: disable CodeReuse/ActiveRecord + def slice_nodes(sliced, encoded_cursor, before_or_after) + decoded_cursor = ordering_from_encoded_json(encoded_cursor) + builder = QueryBuilder.new(arel_table, order_list, decoded_cursor, before_or_after) + ordering = builder.conditions + + sliced.where(*ordering).where.not(id: decoded_cursor['id']) + end + # rubocop: enable CodeReuse/ActiveRecord + + def limit_value + @limit_value ||= [first, last, max_page_size].compact.min + end + + def ordered_items + strong_memoize(:ordered_items) do + unless items.primary_key.present? + raise ArgumentError.new('Relation must have a primary key') + end + + list = OrderInfo.build_order_list(items) + + # ensure there is a primary key ordering + if list&.last&.attribute_name != items.primary_key + items.order(arel_table[items.primary_key].desc) # rubocop: disable CodeReuse/ActiveRecord + else + items + end + end + end + + def order_list + strong_memoize(:order_list) do + OrderInfo.build_order_list(ordered_items) + end + end + + def arel_table + items.arel_table + end + + # Storing the current order values in the cursor allows us to + # make an intelligent decision on handling NULL values. + # Otherwise we would either need to fetch the record first, + # or fetch it in the SQL, significantly complicating it. + def encoded_json_from_ordering(node) + ordering = { 'id' => node[:id].to_s } + + order_list.each do |field| + field_name = field.attribute_name + ordering[field_name] = node[field_name].to_s + end + + encode(ordering.to_json) + end + + def ordering_from_encoded_json(cursor) + JSON.parse(decode(cursor)) + rescue JSON::ParserError + raise Gitlab::Graphql::Errors::ArgumentError, "Please provide a valid cursor" + end + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/keyset/order_info.rb b/lib/gitlab/graphql/pagination/keyset/order_info.rb new file mode 100644 index 00000000000..876d6114f3c --- /dev/null +++ b/lib/gitlab/graphql/pagination/keyset/order_info.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + module Keyset + class OrderInfo + attr_reader :attribute_name, :sort_direction, :named_function + + def initialize(order_value) + @attribute_name, @sort_direction, @named_function = + if order_value.is_a?(String) + extract_nulls_last_order(order_value) + else + extract_attribute_values(order_value) + end + end + + def operator_for(before_or_after) + case before_or_after + when :before + sort_direction == :asc ? '<' : '>' + when :after + sort_direction == :asc ? '>' : '<' + end + end + + # Only allow specific node types + def self.build_order_list(relation) + order_list = relation.order_values.select do |value| + supported_order_value?(value) + end + + order_list.map { |info| OrderInfo.new(info) } + end + + def self.validate_ordering(relation, order_list) + if order_list.empty? + raise ArgumentError.new('A minimum of 1 ordering field is required') + end + + if order_list.count > 2 + raise ArgumentError.new('A maximum of 2 ordering fields are allowed') + end + + # make sure the last ordering field is non-nullable + attribute_name = order_list.last&.attribute_name + + if relation.columns_hash[attribute_name].null + raise ArgumentError.new("Column `#{attribute_name}` must not allow NULL") + end + + if order_list.last.attribute_name != relation.primary_key + raise ArgumentError.new("Last ordering field must be the primary key, `#{relation.primary_key}`") + end + end + + def self.supported_order_value?(order_value) + return true if order_value.is_a?(Arel::Nodes::Ascending) || order_value.is_a?(Arel::Nodes::Descending) + return false unless order_value.is_a?(String) + + tokens = order_value.downcase.split + + tokens.last(2) == %w(nulls last) && tokens.count == 4 + end + + private + + def extract_nulls_last_order(order_value) + tokens = order_value.downcase.split + + [tokens.first, (tokens[1] == 'asc' ? :asc : :desc), nil] + end + + def extract_attribute_values(order_value) + named = nil + name = if ordering_by_lower?(order_value) + named = order_value.expr + named.expressions[0].name.to_s + else + order_value.expr.name + end + + [name, order_value.direction, named] + end + + # determine if ordering using LOWER, eg. "ORDER BY LOWER(boards.name)" + def ordering_by_lower?(order_value) + order_value.expr.is_a?(Arel::Nodes::NamedFunction) && order_value.expr&.name&.downcase == 'lower' + end + end + end + end + end +end diff --git a/lib/gitlab/graphql/pagination/keyset/query_builder.rb b/lib/gitlab/graphql/pagination/keyset/query_builder.rb new file mode 100644 index 00000000000..331981ce723 --- /dev/null +++ b/lib/gitlab/graphql/pagination/keyset/query_builder.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Gitlab + module Graphql + module Pagination + module Keyset + class QueryBuilder + def initialize(arel_table, order_list, decoded_cursor, before_or_after) + @arel_table, @order_list, @decoded_cursor, @before_or_after = arel_table, order_list, decoded_cursor, before_or_after + + if order_list.empty? + raise ArgumentError.new('No ordering scopes have been supplied') + end + end + + # Based on whether the main field we're ordering on is NULL in the + # cursor, we can more easily target our query condition. + # We assume that the last ordering field is unique, meaning + # it will not contain NULLs. + # We currently only support two ordering fields. + # + # Example of the conditions for + # relation: Issue.order(relative_position: :asc).order(id: :asc) + # after cursor: relative_position: 1500, id: 500 + # + # when cursor[relative_position] is not NULL + # + # ("issues"."relative_position" > 1500) + # OR ( + # "issues"."relative_position" = 1500 + # AND + # "issues"."id" > 500 + # ) + # OR ("issues"."relative_position" IS NULL) + # + # when cursor[relative_position] is NULL + # + # "issues"."relative_position" IS NULL + # AND + # "issues"."id" > 500 + # + def conditions + attr_values = order_list.map { |field| decoded_cursor[field.attribute_name] } + + if order_list.count == 1 && attr_values.first.nil? + raise Gitlab::Graphql::Errors::ArgumentError.new('Before/after cursor invalid: `nil` was provided as only sortable value') + end + + if order_list.count == 1 || attr_values.first.present? + Keyset::Conditions::NotNullCondition.new(arel_table, order_list, attr_values, operators, before_or_after).build + else + Keyset::Conditions::NullCondition.new(arel_table, order_list, attr_values, operators, before_or_after).build + end + end + + private + + attr_reader :arel_table, :order_list, :decoded_cursor, :before_or_after + + def operators + order_list.map { |field| field.operator_for(before_or_after) } + end + end + end + end + end +end diff --git a/lib/gitlab/import_export/group/tree_restorer.rb b/lib/gitlab/import_export/group/tree_restorer.rb index f6ebd83bfaa..323e6727a9f 100644 --- a/lib/gitlab/import_export/group/tree_restorer.rb +++ b/lib/gitlab/import_export/group/tree_restorer.rb @@ -20,6 +20,7 @@ module Gitlab def restore @group_attributes = relation_reader.consume_attributes(nil) @group_members = relation_reader.consume_relation(nil, 'members') + .map(&:first) # We need to remove `name` and `path` as we did consume it in previous pass @group_attributes.delete('name') diff --git a/lib/gitlab/import_export/json/legacy_reader.rb b/lib/gitlab/import_export/json/legacy_reader.rb index 57579fe9def..12d6458aedc 100644 --- a/lib/gitlab/import_export/json/legacy_reader.rb +++ b/lib/gitlab/import_export/json/legacy_reader.rb @@ -53,6 +53,7 @@ module Gitlab def initialize(relation_names:, allowed_path:) @relation_names = relation_names.map(&:to_s) + @consumed_relations = Set.new # This is legacy reader, to be used in transition # period before `.ndjson`, @@ -81,17 +82,19 @@ module Gitlab raise ArgumentError, "Invalid #{importable_name} passed to `consume_relation`. Use #{@allowed_path} instead." end - value = relations.delete(key) + Enumerator.new do |documents| + next unless @consumed_relations.add?("#{importable_path}/#{key}") - return value unless block_given? - return if value.nil? + value = relations.delete(key) + next if value.nil? - if value.is_a?(Array) - value.each.with_index do |item, idx| - yield(item, idx) + if value.is_a?(Array) + value.each.with_index do |item, idx| + documents << [item, idx] + end + else + documents << [value, 0] end - else - yield(value, 0) end end diff --git a/lib/gitlab/import_export/json/ndjson_reader.rb b/lib/gitlab/import_export/json/ndjson_reader.rb new file mode 100644 index 00000000000..e9b05afc7d4 --- /dev/null +++ b/lib/gitlab/import_export/json/ndjson_reader.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Gitlab + module ImportExport + module JSON + class NdjsonReader + MAX_JSON_DOCUMENT_SIZE = 50.megabytes + + attr_reader :dir_path + + def initialize(dir_path) + @dir_path = dir_path + @consumed_relations = Set.new + end + + def exist? + Dir.exist?(@dir_path) + end + + # This can be removed once legacy_reader is deprecated. + def legacy? + false + end + + def consume_attributes(importable_path) + # This reads from `tree/project.json` + path = file_path("#{importable_path}.json") + data = File.read(path, MAX_JSON_DOCUMENT_SIZE) + json_decode(data) + end + + def consume_relation(importable_path, key) + Enumerator.new do |documents| + next unless @consumed_relations.add?("#{importable_path}/#{key}") + + # This reads from `tree/project/merge_requests.ndjson` + path = file_path(importable_path, "#{key}.ndjson") + next unless File.exist?(path) + + File.foreach(path, MAX_JSON_DOCUMENT_SIZE).with_index do |line, line_num| + documents << [json_decode(line), line_num] + end + end + end + + private + + def json_decode(string) + ActiveSupport::JSON.decode(string) + rescue ActiveSupport::JSON.parse_error => e + Gitlab::ErrorTracking.log_exception(e) + raise Gitlab::ImportExport::Error, 'Incorrect JSON format' + end + + def file_path(*path) + File.join(dir_path, *path) + end + end + end + end +end diff --git a/lib/gitlab/import_export/project/tree_restorer.rb b/lib/gitlab/import_export/project/tree_restorer.rb index 99e57d9decd..ad3720b56be 100644 --- a/lib/gitlab/import_export/project/tree_restorer.rb +++ b/lib/gitlab/import_export/project/tree_restorer.rb @@ -17,8 +17,13 @@ module Gitlab end def restore + unless relation_reader + raise Gitlab::ImportExport::Error, 'invalid import format' + end + @project_attributes = relation_reader.consume_attributes(importable_path) @project_members = relation_reader.consume_relation(importable_path, 'project_members') + .map(&:first) if relation_tree_restorer.restore import_failure_service.with_retry(action: 'set_latest_merge_request_diff_ids!') do @@ -38,14 +43,27 @@ module Gitlab def relation_reader strong_memoize(:relation_reader) do - ImportExport::JSON::LegacyReader::File.new( - File.join(shared.export_path, 'project.json'), - relation_names: reader.project_relation_names, - allowed_path: importable_path - ) + [ndjson_relation_reader, legacy_relation_reader] + .compact.find(&:exist?) end end + def ndjson_relation_reader + return unless Feature.enabled?(:project_import_ndjson, project.namespace) + + ImportExport::JSON::NdjsonReader.new( + File.join(shared.export_path, 'tree') + ) + end + + def legacy_relation_reader + ImportExport::JSON::LegacyReader::File.new( + File.join(shared.export_path, 'project.json'), + relation_names: reader.project_relation_names, + allowed_path: importable_path + ) + end + def relation_tree_restorer @relation_tree_restorer ||= RelationTreeRestorer.new( user: @user, diff --git a/lib/gitlab/import_export/relation_tree_restorer.rb b/lib/gitlab/import_export/relation_tree_restorer.rb index 78ed365cea0..056945d0294 100644 --- a/lib/gitlab/import_export/relation_tree_restorer.rb +++ b/lib/gitlab/import_export/relation_tree_restorer.rb @@ -67,7 +67,7 @@ module Gitlab end def process_relation!(relation_key, relation_definition) - @relation_reader.consume_relation(@importable_path, relation_key) do |data_hash, relation_index| + @relation_reader.consume_relation(@importable_path, relation_key).each do |data_hash, relation_index| process_relation_item!(relation_key, relation_definition, relation_index, data_hash) end end -- cgit v1.2.1