diff options
Diffstat (limited to 'lib/gitlab/usage/metrics')
10 files changed, 340 insertions, 214 deletions
diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb index 4c40bfbc06f..3ec06fba5d1 100644 --- a/lib/gitlab/usage/metrics/aggregates/aggregate.rb +++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb @@ -22,9 +22,7 @@ module Gitlab }.freeze class Aggregate - delegate :weekly_time_range, - :monthly_time_range, - to: Gitlab::UsageDataCounters::HLLRedisCounter + include Gitlab::Usage::TimeFrame def initialize(recorded_at) @aggregated_metrics = load_metrics(AGGREGATED_METRICS_PATH) @@ -32,15 +30,15 @@ module Gitlab end def all_time_data - aggregated_metrics_data(start_date: nil, end_date: nil, time_frame: Gitlab::Utils::UsageData::ALL_TIME_TIME_FRAME_NAME) + aggregated_metrics_data(start_date: nil, end_date: nil, time_frame: Gitlab::Usage::TimeFrame::ALL_TIME_TIME_FRAME_NAME) end def monthly_data - aggregated_metrics_data(**monthly_time_range.merge(time_frame: Gitlab::Utils::UsageData::TWENTY_EIGHT_DAYS_TIME_FRAME_NAME)) + aggregated_metrics_data(**monthly_time_range.merge(time_frame: Gitlab::Usage::TimeFrame::TWENTY_EIGHT_DAYS_TIME_FRAME_NAME)) end def weekly_data - aggregated_metrics_data(**weekly_time_range.merge(time_frame: Gitlab::Utils::UsageData::SEVEN_DAYS_TIME_FRAME_NAME)) + aggregated_metrics_data(**weekly_time_range.merge(time_frame: Gitlab::Usage::TimeFrame::SEVEN_DAYS_TIME_FRAME_NAME)) end private @@ -54,7 +52,7 @@ module Gitlab case aggregation[:source] when REDIS_SOURCE - if time_frame == Gitlab::Utils::UsageData::ALL_TIME_TIME_FRAME_NAME + if time_frame == Gitlab::Usage::TimeFrame::ALL_TIME_TIME_FRAME_NAME data[aggregation[:name]] = Gitlab::Utils::UsageData::FALLBACK Gitlab::ErrorTracking .track_and_raise_for_dev_exception( @@ -64,8 +62,6 @@ module Gitlab data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date) end when DATABASE_SOURCE - next unless Feature.enabled?('database_sourced_aggregated_metrics', default_enabled: false, type: :development) - data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date) else Gitlab::ErrorTracking diff --git a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb index 3069afab147..eccf79b9703 100644 --- a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb +++ b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb @@ -56,15 +56,15 @@ module Gitlab end def time_period_to_human_name(time_period) - return Gitlab::Utils::UsageData::ALL_TIME_TIME_FRAME_NAME if time_period.blank? + return Gitlab::Usage::TimeFrame::ALL_TIME_TIME_FRAME_NAME if time_period.blank? start_date = time_period.first.to_date end_date = time_period.last.to_date if (end_date - start_date).to_i > 7 - Gitlab::Utils::UsageData::TWENTY_EIGHT_DAYS_TIME_FRAME_NAME + Gitlab::Usage::TimeFrame::TWENTY_EIGHT_DAYS_TIME_FRAME_NAME else - Gitlab::Utils::UsageData::SEVEN_DAYS_TIME_FRAME_NAME + Gitlab::Usage::TimeFrame::SEVEN_DAYS_TIME_FRAME_NAME end end end diff --git a/lib/gitlab/usage/metrics/instrumentations/base_metric.rb b/lib/gitlab/usage/metrics/instrumentations/base_metric.rb index 29b44f2bd0a..7b5bee3f8bd 100644 --- a/lib/gitlab/usage/metrics/instrumentations/base_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/base_metric.rb @@ -6,11 +6,14 @@ module Gitlab module Instrumentations class BaseMetric include Gitlab::Utils::UsageData + include Gitlab::Usage::TimeFrame attr_reader :time_frame + attr_reader :options - def initialize(time_frame:) + def initialize(time_frame:, options: {}) @time_frame = time_frame + @options = options end end end diff --git a/lib/gitlab/usage/metrics/instrumentations/count_users_using_approve_quick_action_metric.rb b/lib/gitlab/usage/metrics/instrumentations/count_users_using_approve_quick_action_metric.rb deleted file mode 100644 index 9c92f2e9595..00000000000 --- a/lib/gitlab/usage/metrics/instrumentations/count_users_using_approve_quick_action_metric.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Usage - module Metrics - module Instrumentations - class CountUsersUsingApproveQuickActionMetric < RedisHLLMetric - event_names :i_quickactions_approve - end - end - end - end -end diff --git a/lib/gitlab/usage/metrics/instrumentations/database_metric.rb b/lib/gitlab/usage/metrics/instrumentations/database_metric.rb index f83f90dea03..69a288e5b6e 100644 --- a/lib/gitlab/usage/metrics/instrumentations/database_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/database_metric.rb @@ -43,16 +43,28 @@ module Gitlab finish: self.class.metric_finish&.call) end - def relation - self.class.metric_relation.call.where(time_constraints) + def to_sql + Gitlab::Usage::Metrics::Query.for(self.class.metric_operation, relation, self.class.column) + end + + def suggested_name + Gitlab::Usage::Metrics::NameSuggestion.for( + self.class.metric_operation, + relation: relation, + column: self.class.column + ) end private + def relation + self.class.metric_relation.call.where(time_constraints) + end + def time_constraints case time_frame when '28d' - { created_at: 30.days.ago..2.days.ago } + monthly_time_range_db_params when 'all' {} when 'none' diff --git a/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb b/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb index 7c97cc37d17..1849773e33d 100644 --- a/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb @@ -13,6 +13,9 @@ module Gitlab # end # end class << self + attr_reader :metric_operation + @metric_operation = :alt + def value(&block) @metric_value = block end @@ -25,6 +28,12 @@ module Gitlab self.class.metric_value.call end end + + def suggested_name + Gitlab::Usage::Metrics::NameSuggestion.for( + self.class.metric_operation + ) + end end end end diff --git a/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb b/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb index 140d56f0d42..a36e612a1cb 100644 --- a/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb @@ -7,35 +7,50 @@ module Gitlab class RedisHLLMetric < BaseMetric # Usage example # - # class CountUsersVisitingAnalyticsValuestreamMetric < RedisHLLMetric - # event_names :g_analytics_valuestream + # In metric YAML defintion + # instrumentation_class: RedisHLLMetric + # events: + # - g_analytics_valuestream # end class << self - def event_names(events = nil) - @metric_events = events - end + attr_reader :metric_operation + @metric_operation = :redis + end - attr_reader :metric_events + def initialize(time_frame:, options: {}) + super + + raise ArgumentError, "options events are required" unless metric_events.present? + end + + def metric_events + options[:events] end def value redis_usage_data do - event_params = time_constraints.merge(event_names: self.class.metric_events) + event_params = time_constraints.merge(event_names: metric_events) Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(**event_params) end end + def suggested_name + Gitlab::Usage::Metrics::NameSuggestion.for( + self.class.metric_operation + ) + end + private def time_constraints case time_frame when '28d' - { start_date: 4.weeks.ago.to_date, end_date: Date.current } + monthly_time_range when '7d' - { start_date: 7.days.ago.to_date, end_date: Date.current } + weekly_time_range else - raise "Unknown time frame: #{time_frame} for TimeConstraint" + raise "Unknown time frame: #{time_frame} for RedisHLLMetric" end end end diff --git a/lib/gitlab/usage/metrics/name_suggestion.rb b/lib/gitlab/usage/metrics/name_suggestion.rb new file mode 100644 index 00000000000..0728af9e2ca --- /dev/null +++ b/lib/gitlab/usage/metrics/name_suggestion.rb @@ -0,0 +1,200 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + class NameSuggestion + FREE_TEXT_METRIC_NAME = "<please fill metric name>" + REDIS_EVENT_METRIC_NAME = "<please fill metric name, suggested format is: {subject}_{verb}{ing|ed}_{object} eg: users_creating_epics or merge_requests_viewed_in_single_file_mode>" + CONSTRAINTS_PROMPT_TEMPLATE = "<adjective describing: '%{constraints}'>" + + class << self + def for(operation, relation: nil, column: nil) + case operation + when :count + name_suggestion(column: column, relation: relation, prefix: 'count') + when :distinct_count + name_suggestion(column: column, relation: relation, prefix: 'count_distinct', distinct: :distinct) + when :estimate_batch_distinct_count + name_suggestion(column: column, relation: relation, prefix: 'estimate_distinct_count') + when :sum + name_suggestion(column: column, relation: relation, prefix: 'sum') + when :redis + REDIS_EVENT_METRIC_NAME + when :alt + FREE_TEXT_METRIC_NAME + else + raise ArgumentError, "#{operation} operation not supported" + end + end + + private + + def name_suggestion(relation:, column: nil, prefix: nil, distinct: nil) + # rubocop: disable CodeReuse/ActiveRecord + relation = relation.unscope(where: :created_at) + # rubocop: enable CodeReuse/ActiveRecord + + parts = [prefix] + arel_column = arelize_column(relation, column) + + # nil as column indicates that the counting would use fallback value of primary key. + # Because counting primary key from relation is the conceptual equal to counting all + # records from given relation, in order to keep name suggestion more condensed + # primary key column is skipped. + # eg: SELECT COUNT(id) FROM issues would translate as count_issues and not + # as count_id_from_issues since it does not add more information to the name suggestion + if arel_column != Arel::Table.new(relation.table_name)[relation.primary_key] + parts << arel_column.name + parts << 'from' + end + + arel = arel_query(relation: relation, column: arel_column, distinct: distinct) + constraints = parse_constraints(relation: relation, arel: arel) + + # In some cases due to performance reasons metrics are instrumented with joined relations + # where relation listed in FROM statement is not the one that includes counted attribute + # in such situations to make name suggestion more intuitive source should be inferred based + # on the relation that provide counted attribute + # EG: SELECT COUNT(deployments.environment_id) FROM clusters + # JOIN deployments ON deployments.cluster_id = cluster.id + # should be translated into: + # count_environment_id_from_deployments_with_clusters + # instead of + # count_environment_id_from_clusters_with_deployments + actual_source = parse_source(relation, arel_column) + + append_constraints_prompt(actual_source, [constraints], parts) + + parts << actual_source + parts += process_joined_relations(actual_source, arel, relation, constraints) + parts.compact.join('_').delete('"') + end + + def append_constraints_prompt(target, constraints, parts) + applicable_constraints = constraints.select { |constraint| constraint.include?(target) } + return unless applicable_constraints.any? + + parts << CONSTRAINTS_PROMPT_TEMPLATE % { constraints: applicable_constraints.join(' AND ') } + end + + def parse_constraints(relation:, arel:) + connection = relation.connection + ::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Constraints + .new(connection) + .accept(arel, collector(connection)) + .value + end + + # TODO: joins with `USING` keyword + def process_joined_relations(actual_source, arel, relation, where_constraints) + joins = parse_joins(connection: relation.connection, arel: arel) + return [] unless joins.any? + + sources = [relation.table_name, *joins.map { |join| join[:source] }] + joins = extract_joins_targets(joins, sources) + + relations = if actual_source != relation.table_name + build_relations_tree(joins + [{ source: relation.table_name }], actual_source) + else + # in case where counter attribute comes from joined relations, the relations + # diagram has to be built bottom up, thus source and target are reverted + build_relations_tree(joins + [{ source: relation.table_name }], actual_source, source_key: :target, target_key: :source) + end + + collect_join_parts(relations: relations[actual_source], joins: joins, wheres: where_constraints) + end + + def parse_joins(connection:, arel:) + ::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Joins + .new(connection) + .accept(arel) + end + + def extract_joins_targets(joins, sources) + joins.map do |join| + source_regex = /(#{join[:source]})\.(\w+_)*id/i + + tables_except_src = (sources - [join[:source]]).join('|') + target_regex = /(?<target>#{tables_except_src})\.(\w+_)*id/i + + join_cond_regex = /(#{source_regex}\s+=\s+#{target_regex})|(#{target_regex}\s+=\s+#{source_regex})/i + matched = join_cond_regex.match(join[:constraints]) + + if matched + join[:target] = matched[:target] + join[:constraints].gsub!(/#{join_cond_regex}(\s+(and|or))*/i, '') + end + + join + end + end + + def build_relations_tree(joins, parent, source_key: :source, target_key: :target) + return [] if joins.blank? + + tree = {} + tree[parent] = [] + + joins.each do |join| + if join[source_key] == parent + tree[parent] << build_relations_tree(joins - [join], join[target_key], source_key: source_key, target_key: target_key) + end + end + tree + end + + def collect_join_parts(relations:, joins:, wheres:, parts: [], conjunctions: %w[with having including].cycle) + conjunction = conjunctions.next + relations.each do |subtree| + subtree.each do |parent, children| + parts << "<#{conjunction}>" + join_constraints = joins.find { |join| join[:source] == parent }&.dig(:constraints) + append_constraints_prompt(parent, [wheres, join_constraints].compact, parts) + parts << parent + collect_join_parts(relations: children, joins: joins, wheres: wheres, parts: parts, conjunctions: conjunctions) + end + end + parts + end + + def arelize_column(relation, column) + case column + when Arel::Attribute + column + when NilClass + Arel::Table.new(relation.table_name)[relation.primary_key] + when String + if column.include?('.') + table, col = column.split('.') + Arel::Table.new(table)[col] + else + Arel::Table.new(relation.table_name)[column] + end + when Symbol + arelize_column(relation, column.to_s) + end + end + + def parse_source(relation, column) + column.relation.name || relation.table_name + end + + def collector(connection) + Arel::Collectors::SubstituteBinds.new(connection, Arel::Collectors::SQLString.new) + end + + def arel_query(relation:, column: nil, distinct: nil) + column ||= relation.primary_key + + if column.is_a?(Arel::Attribute) + relation.select(column.count(distinct)).arel + else + relation.select(relation.all.table[column].count(distinct)).arel + end + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/names_suggestions/generator.rb b/lib/gitlab/usage/metrics/names_suggestions/generator.rb index 49581169452..a669b43f395 100644 --- a/lib/gitlab/usage/metrics/names_suggestions/generator.rb +++ b/lib/gitlab/usage/metrics/names_suggestions/generator.rb @@ -5,10 +5,6 @@ module Gitlab module Metrics module NamesSuggestions class Generator < ::Gitlab::UsageData - FREE_TEXT_METRIC_NAME = "<please fill metric name>" - REDIS_EVENT_METRIC_NAME = "<please fill metric name, suggested format is: {subject}_{verb}{ing|ed}_{object} eg: users_creating_epics or merge_requests_viewed_in_single_file_mode>" - CONSTRAINTS_PROMPT_TEMPLATE = "<adjective describing: '%{constraints}'>" - class << self def generate(key_path) uncached_data.deep_stringify_keys.dig(*key_path.split('.')) @@ -17,200 +13,36 @@ module Gitlab private def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil) - name_suggestion(column: column, relation: relation, prefix: 'count') + Gitlab::Usage::Metrics::NameSuggestion.for(:count, column: column, relation: relation) end def distinct_count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil) - name_suggestion(column: column, relation: relation, prefix: 'count_distinct', distinct: :distinct) + Gitlab::Usage::Metrics::NameSuggestion.for(:distinct_count, column: column, relation: relation) end def redis_usage_counter - REDIS_EVENT_METRIC_NAME + Gitlab::Usage::Metrics::NameSuggestion.for(:redis) end def alt_usage_data(*) - FREE_TEXT_METRIC_NAME + Gitlab::Usage::Metrics::NameSuggestion.for(:alt) end def redis_usage_data_totals(counter) - counter.fallback_totals.transform_values { |_| REDIS_EVENT_METRIC_NAME } + counter.fallback_totals.transform_values { |_| Gitlab::Usage::Metrics::NameSuggestion.for(:redis) } end def sum(relation, column, *rest) - name_suggestion(column: column, relation: relation, prefix: 'sum') + Gitlab::Usage::Metrics::NameSuggestion.for(:sum, column: column, relation: relation) end def estimate_batch_distinct_count(relation, column = nil, *rest) - name_suggestion(column: column, relation: relation, prefix: 'estimate_distinct_count') + Gitlab::Usage::Metrics::NameSuggestion.for(:estimate_batch_distinct_count, column: column, relation: relation) end def add(*args) "add_#{args.join('_and_')}" end - - def name_suggestion(relation:, column: nil, prefix: nil, distinct: nil) - # rubocop: disable CodeReuse/ActiveRecord - relation = relation.unscope(where: :created_at) - # rubocop: enable CodeReuse/ActiveRecord - - parts = [prefix] - arel_column = arelize_column(relation, column) - - # nil as column indicates that the counting would use fallback value of primary key. - # Because counting primary key from relation is the conceptual equal to counting all - # records from given relation, in order to keep name suggestion more condensed - # primary key column is skipped. - # eg: SELECT COUNT(id) FROM issues would translate as count_issues and not - # as count_id_from_issues since it does not add more information to the name suggestion - if arel_column != Arel::Table.new(relation.table_name)[relation.primary_key] - parts << arel_column.name - parts << 'from' - end - - arel = arel_query(relation: relation, column: arel_column, distinct: distinct) - constraints = parse_constraints(relation: relation, arel: arel) - - # In some cases due to performance reasons metrics are instrumented with joined relations - # where relation listed in FROM statement is not the one that includes counted attribute - # in such situations to make name suggestion more intuitive source should be inferred based - # on the relation that provide counted attribute - # EG: SELECT COUNT(deployments.environment_id) FROM clusters - # JOIN deployments ON deployments.cluster_id = cluster.id - # should be translated into: - # count_environment_id_from_deployments_with_clusters - # instead of - # count_environment_id_from_clusters_with_deployments - actual_source = parse_source(relation, arel_column) - - append_constraints_prompt(actual_source, [constraints], parts) - - parts << actual_source - parts += process_joined_relations(actual_source, arel, relation, constraints) - parts.compact.join('_').delete('"') - end - - def append_constraints_prompt(target, constraints, parts) - applicable_constraints = constraints.select { |constraint| constraint.include?(target) } - return unless applicable_constraints.any? - - parts << CONSTRAINTS_PROMPT_TEMPLATE % { constraints: applicable_constraints.join(' AND ') } - end - - def parse_constraints(relation:, arel:) - connection = relation.connection - ::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Constraints - .new(connection) - .accept(arel, collector(connection)) - .value - end - - # TODO: joins with `USING` keyword - def process_joined_relations(actual_source, arel, relation, where_constraints) - joins = parse_joins(connection: relation.connection, arel: arel) - return [] unless joins.any? - - sources = [relation.table_name, *joins.map { |join| join[:source] }] - joins = extract_joins_targets(joins, sources) - - relations = if actual_source != relation.table_name - build_relations_tree(joins + [{ source: relation.table_name }], actual_source) - else - # in case where counter attribute comes from joined relations, the relations - # diagram has to be built bottom up, thus source and target are reverted - build_relations_tree(joins + [{ source: relation.table_name }], actual_source, source_key: :target, target_key: :source) - end - - collect_join_parts(relations: relations[actual_source], joins: joins, wheres: where_constraints) - end - - def parse_joins(connection:, arel:) - ::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Joins - .new(connection) - .accept(arel) - end - - def extract_joins_targets(joins, sources) - joins.map do |join| - source_regex = /(#{join[:source]})\.(\w+_)*id/i - - tables_except_src = (sources - [join[:source]]).join('|') - target_regex = /(?<target>#{tables_except_src})\.(\w+_)*id/i - - join_cond_regex = /(#{source_regex}\s+=\s+#{target_regex})|(#{target_regex}\s+=\s+#{source_regex})/i - matched = join_cond_regex.match(join[:constraints]) - - if matched - join[:target] = matched[:target] - join[:constraints].gsub!(/#{join_cond_regex}(\s+(and|or))*/i, '') - end - - join - end - end - - def build_relations_tree(joins, parent, source_key: :source, target_key: :target) - return [] if joins.blank? - - tree = {} - tree[parent] = [] - - joins.each do |join| - if join[source_key] == parent - tree[parent] << build_relations_tree(joins - [join], join[target_key], source_key: source_key, target_key: target_key) - end - end - tree - end - - def collect_join_parts(relations:, joins:, wheres:, parts: [], conjunctions: %w[with having including].cycle) - conjunction = conjunctions.next - relations.each do |subtree| - subtree.each do |parent, children| - parts << "<#{conjunction}>" - join_constraints = joins.find { |join| join[:source] == parent }&.dig(:constraints) - append_constraints_prompt(parent, [wheres, join_constraints].compact, parts) - parts << parent - collect_join_parts(relations: children, joins: joins, wheres: wheres, parts: parts, conjunctions: conjunctions) - end - end - parts - end - - def arelize_column(relation, column) - case column - when Arel::Attribute - column - when NilClass - Arel::Table.new(relation.table_name)[relation.primary_key] - when String - if column.include?('.') - table, col = column.split('.') - Arel::Table.new(table)[col] - else - Arel::Table.new(relation.table_name)[column] - end - when Symbol - arelize_column(relation, column.to_s) - end - end - - def parse_source(relation, column) - column.relation.name || relation.table_name - end - - def collector(connection) - Arel::Collectors::SubstituteBinds.new(connection, Arel::Collectors::SQLString.new) - end - - def arel_query(relation:, column: nil, distinct: nil) - column ||= relation.primary_key - - if column.is_a?(Arel::Attribute) - relation.select(column.count(distinct)).arel - else - relation.select(relation.all.table[column].count(distinct)).arel - end - end end end end diff --git a/lib/gitlab/usage/metrics/query.rb b/lib/gitlab/usage/metrics/query.rb new file mode 100644 index 00000000000..f6947c4c8ff --- /dev/null +++ b/lib/gitlab/usage/metrics/query.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + class Query + class << self + def for(operation, relation, column = nil, **extra) + case operation + when :count + count(relation, column) + when :distinct_count + distinct_count(relation, column) + when :sum + sum(relation, column) + when :estimate_batch_distinct_count + estimate_batch_distinct_count(relation, column) + when :histogram + histogram(relation, column, **extra) + else + raise ArgumentError, "#{operation} operation not supported" + end + end + + private + + def count(relation, column = nil) + raw_sql(relation, column) + end + + def distinct_count(relation, column = nil) + raw_sql(relation, column, true) + end + + def sum(relation, column) + relation.select(relation.all.table[column].sum).to_sql + end + + def estimate_batch_distinct_count(relation, column = nil) + raw_sql(relation, column, true) + end + + # rubocop: disable CodeReuse/ActiveRecord + def histogram(relation, column, buckets:, bucket_size: buckets.size) + count_grouped = relation.group(column).select(Arel.star.count.as('count_grouped')) + cte = Gitlab::SQL::CTE.new(:count_cte, count_grouped) + + bucket_segments = bucket_size - 1 + width_bucket = Arel::Nodes::NamedFunction + .new('WIDTH_BUCKET', [cte.table[:count_grouped], buckets.first, buckets.last, bucket_segments]) + .as('buckets') + + query = cte + .table + .project(width_bucket, cte.table[:count]) + .group('buckets') + .order('buckets') + .with(cte.to_arel) + + query.to_sql + end + # rubocop: enable CodeReuse/ActiveRecord + + def raw_sql(relation, column, distinct = false) + column ||= relation.primary_key + relation.select(relation.all.table[column].count(distinct)).to_sql + end + end + end + end + end +end |