summaryrefslogtreecommitdiff
path: root/lib/gitlab/usage/metrics/aggregates/aggregate.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/usage/metrics/aggregates/aggregate.rb')
-rw-r--r--lib/gitlab/usage/metrics/aggregates/aggregate.rb65
1 files changed, 2 insertions, 63 deletions
diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb
index f77c8cab39c..4c40bfbc06f 100644
--- a/lib/gitlab/usage/metrics/aggregates/aggregate.rb
+++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb
@@ -83,7 +83,7 @@ module Gitlab
when UNION_OF_AGGREGATED_METRICS
source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at)
when INTERSECTION_OF_AGGREGATED_METRICS
- calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date)
+ source.calculate_metrics_intersections(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
@@ -94,67 +94,6 @@ module Gitlab
Gitlab::Utils::UsageData::FALLBACK
end
- # calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
- # this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
- def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
- # calculate power of intersection of all given metrics from inclusion exclusion principle
- # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
- # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
- # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
- # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
-
- # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
- subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
-
- # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
- power_of_union_of_all_metrics = begin
- subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
- source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
- end
-
- # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
- # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
- # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
- # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
- # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
- # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
- subset_powers_size_even = subset_powers_data.size.even?
-
- # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
- sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
-
- # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
- sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
- end
-
- def sum_subset_powers(subset_powers_data, subset_powers_size_even)
- sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
- (index + 1).odd? ? value : -value
- end
-
- (subset_powers_size_even ? -1 : 1) * sum_without_sign
- end
-
- def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
- subset_sizes = (1...metric_names.size)
-
- subset_sizes.map do |subset_size|
- if subset_size > 1
- # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
- metric_names.combination(subset_size).sum do |metrics_subset|
- subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
- calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
- end
- else
- # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
- metric_names.sum do |metric|
- subset_powers_cache[subset_size][metric] ||= \
- source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
- end
- end
- end
- end
-
def load_metrics(wildcard)
Dir[wildcard].each_with_object([]) do |path, metrics|
metrics.push(*load_yaml_from_path(path))
@@ -170,4 +109,4 @@ module Gitlab
end
end
-Gitlab::Usage::Metrics::Aggregates::Aggregate.prepend_if_ee('EE::Gitlab::Usage::Metrics::Aggregates::Aggregate')
+Gitlab::Usage::Metrics::Aggregates::Aggregate.prepend_mod_with('Gitlab::Usage::Metrics::Aggregates::Aggregate')