diff options
Diffstat (limited to 'lib/gitlab/usage/metrics')
15 files changed, 362 insertions, 63 deletions
diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb index f77c8cab39c..4c40bfbc06f 100644 --- a/lib/gitlab/usage/metrics/aggregates/aggregate.rb +++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb @@ -83,7 +83,7 @@ module Gitlab when UNION_OF_AGGREGATED_METRICS source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at) when INTERSECTION_OF_AGGREGATED_METRICS - calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date) + source.calculate_metrics_intersections(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at) else Gitlab::ErrorTracking .track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")) @@ -94,67 +94,6 @@ module Gitlab Gitlab::Utils::UsageData::FALLBACK end - # calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle - # this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391 - def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({})) - # calculate power of intersection of all given metrics from inclusion exclusion principle - # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) => - # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| - # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => - # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| - - # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... - subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache) - - # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D| - power_of_union_of_all_metrics = begin - subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \ - source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) - end - - # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate, - # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below - # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| => - # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| - # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => - # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| - subset_powers_size_even = subset_powers_data.size.even? - - # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... => - sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even) - - # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D| - sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics) - end - - def sum_subset_powers(subset_powers_data, subset_powers_size_even) - sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index| - (index + 1).odd? ? value : -value - end - - (subset_powers_size_even ? -1 : 1) * sum_without_sign - end - - def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache) - subset_sizes = (1...metric_names.size) - - subset_sizes.map do |subset_size| - if subset_size > 1 - # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|) - metric_names.combination(subset_size).sum do |metrics_subset| - subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||= - calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache) - end - else - # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ... - metric_names.sum do |metric| - subset_powers_cache[subset_size][metric] ||= \ - source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at) - end - end - end - end - def load_metrics(wildcard) Dir[wildcard].each_with_object([]) do |path, metrics| metrics.push(*load_yaml_from_path(path)) @@ -170,4 +109,4 @@ module Gitlab end end -Gitlab::Usage::Metrics::Aggregates::Aggregate.prepend_if_ee('EE::Gitlab::Usage::Metrics::Aggregates::Aggregate') +Gitlab::Usage::Metrics::Aggregates::Aggregate.prepend_mod_with('Gitlab::Usage::Metrics::Aggregates::Aggregate') diff --git a/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb b/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb new file mode 100644 index 00000000000..dabf757c8a7 --- /dev/null +++ b/lib/gitlab/usage/metrics/aggregates/sources/calculations/intersection.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Aggregates + module Sources + module Calculations + module Intersection + def calculate_metrics_intersections(metric_names:, start_date:, end_date:, recorded_at:, subset_powers_cache: Hash.new({})) + # calculate power of intersection of all given metrics from inclusion exclusion principle + # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) => + # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| + # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => + # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| + + # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... + subset_powers_data = subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, subset_powers_cache) + + # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D| + power_of_union_of_all_metrics = begin + subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \ + calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + end + + # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate, + # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below + # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| => + # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| + # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => + # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| + subset_powers_size_even = subset_powers_data.size.even? + + # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... => + sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even) + + # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D| + sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics) + end + + private + + def subsets_intersection_powers(metric_names, start_date, end_date, recorded_at, subset_powers_cache) + subset_sizes = (1...metric_names.size) + + subset_sizes.map do |subset_size| + if subset_size > 1 + # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|) + metric_names.combination(subset_size).sum do |metrics_subset| + subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||= + calculate_metrics_intersections(metric_names: metrics_subset, start_date: start_date, end_date: end_date, recorded_at: recorded_at, subset_powers_cache: subset_powers_cache) + end + else + # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ... + metric_names.sum do |metric| + subset_powers_cache[subset_size][metric] ||= \ + calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + end + end + end + end + + def sum_subset_powers(subset_powers_data, subset_powers_size_even) + sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index| + (index + 1).odd? ? value : -value + end + + (subset_powers_size_even ? -1 : 1) * sum_without_sign + end + end + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb index a01efbdb1a6..3069afab147 100644 --- a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb +++ b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb @@ -6,6 +6,7 @@ module Gitlab module Aggregates module Sources class PostgresHll + extend Calculations::Intersection class << self def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:) time_period = start_date && end_date ? (start_date..end_date) : nil diff --git a/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb index f3a4dcf1e31..009b8e62543 100644 --- a/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb +++ b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb @@ -8,6 +8,7 @@ module Gitlab UnionNotAvailable = Class.new(AggregatedMetricError) class RedisHll + extend Calculations::Intersection def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil) union = Gitlab::UsageDataCounters::HLLRedisCounter .calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date) diff --git a/lib/gitlab/usage/metrics/instrumentations/base_metric.rb b/lib/gitlab/usage/metrics/instrumentations/base_metric.rb new file mode 100644 index 00000000000..29b44f2bd0a --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/base_metric.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class BaseMetric + include Gitlab::Utils::UsageData + + attr_reader :time_frame + + def initialize(time_frame:) + @time_frame = time_frame + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/count_boards_metric.rb b/lib/gitlab/usage/metrics/instrumentations/count_boards_metric.rb new file mode 100644 index 00000000000..4e1ba027bca --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/count_boards_metric.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class CountBoardsMetric < DatabaseMetric + operation :count + + relation { Board } + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/count_issues_metric.rb b/lib/gitlab/usage/metrics/instrumentations/count_issues_metric.rb new file mode 100644 index 00000000000..34247f4f6dd --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/count_issues_metric.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class CountIssuesMetric < DatabaseMetric + operation :count + + start { Issue.minimum(:id) } + finish { Issue.maximum(:id) } + + relation { Issue } + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/count_users_creating_issues_metric.rb b/lib/gitlab/usage/metrics/instrumentations/count_users_creating_issues_metric.rb new file mode 100644 index 00000000000..c8331ce5b31 --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/count_users_creating_issues_metric.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class CountUsersCreatingIssuesMetric < DatabaseMetric + operation :distinct_count, column: :author_id + + relation { Issue } + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/count_users_using_approve_quick_action_metric.rb b/lib/gitlab/usage/metrics/instrumentations/count_users_using_approve_quick_action_metric.rb new file mode 100644 index 00000000000..9c92f2e9595 --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/count_users_using_approve_quick_action_metric.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class CountUsersUsingApproveQuickActionMetric < RedisHLLMetric + event_names :i_quickactions_approve + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/database_metric.rb b/lib/gitlab/usage/metrics/instrumentations/database_metric.rb new file mode 100644 index 00000000000..f83f90dea03 --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/database_metric.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class DatabaseMetric < BaseMetric + # Usage Example + # + # class CountUsersCreatingIssuesMetric < DatabaseMetric + # operation :distinct_count, column: :author_id + # + # relation do |database_time_constraints| + # ::Issue.where(database_time_constraints) + # end + # end + class << self + def start(&block) + @metric_start = block + end + + def finish(&block) + @metric_finish = block + end + + def relation(&block) + @metric_relation = block + end + + def operation(symbol, column: nil) + @metric_operation = symbol + @column = column + end + + attr_reader :metric_operation, :metric_relation, :metric_start, :metric_finish, :column + end + + def value + method(self.class.metric_operation) + .call(relation, + self.class.column, + start: self.class.metric_start&.call, + finish: self.class.metric_finish&.call) + end + + def relation + self.class.metric_relation.call.where(time_constraints) + end + + private + + def time_constraints + case time_frame + when '28d' + { created_at: 30.days.ago..2.days.ago } + when 'all' + {} + when 'none' + nil + else + raise "Unknown time frame: #{time_frame} for DatabaseMetric" + end + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb b/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb new file mode 100644 index 00000000000..7c97cc37d17 --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class GenericMetric < BaseMetric + # Usage example + # + # class UuidMetric < GenericMetric + # value do + # Gitlab::CurrentSettings.uuid + # end + # end + class << self + def value(&block) + @metric_value = block + end + + attr_reader :metric_value + end + + def value + alt_usage_data do + self.class.metric_value.call + end + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/hostname_metric.rb b/lib/gitlab/usage/metrics/instrumentations/hostname_metric.rb new file mode 100644 index 00000000000..3364c330cca --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/hostname_metric.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class HostnameMetric < GenericMetric + value do + Gitlab.config.gitlab.host + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb b/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb new file mode 100644 index 00000000000..140d56f0d42 --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class RedisHLLMetric < BaseMetric + # Usage example + # + # class CountUsersVisitingAnalyticsValuestreamMetric < RedisHLLMetric + # event_names :g_analytics_valuestream + # end + class << self + def event_names(events = nil) + @metric_events = events + end + + attr_reader :metric_events + end + + def value + redis_usage_data do + event_params = time_constraints.merge(event_names: self.class.metric_events) + + Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(**event_params) + end + end + + private + + def time_constraints + case time_frame + when '28d' + { start_date: 4.weeks.ago.to_date, end_date: Date.current } + when '7d' + { start_date: 7.days.ago.to_date, end_date: Date.current } + else + raise "Unknown time frame: #{time_frame} for TimeConstraint" + end + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/instrumentations/uuid_metric.rb b/lib/gitlab/usage/metrics/instrumentations/uuid_metric.rb new file mode 100644 index 00000000000..58547b5383a --- /dev/null +++ b/lib/gitlab/usage/metrics/instrumentations/uuid_metric.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Instrumentations + class UuidMetric < GenericMetric + value do + Gitlab::CurrentSettings.uuid + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/key_path_processor.rb b/lib/gitlab/usage/metrics/key_path_processor.rb new file mode 100644 index 00000000000..dbe574d5838 --- /dev/null +++ b/lib/gitlab/usage/metrics/key_path_processor.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + class KeyPathProcessor + class << self + def process(key_path, value) + unflatten(key_path.split('.'), value) + end + + private + + def unflatten(keys, value) + loop do + value = { keys.pop.to_sym => value } + + break if keys.blank? + end + + value + end + end + end + end + end +end |