summaryrefslogtreecommitdiff
path: root/lib/gitlab/usage_data_counters/hll_redis_counter.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/usage_data_counters/hll_redis_counter.rb')
-rw-r--r--lib/gitlab/usage_data_counters/hll_redis_counter.rb125
1 files changed, 15 insertions, 110 deletions
diff --git a/lib/gitlab/usage_data_counters/hll_redis_counter.rb b/lib/gitlab/usage_data_counters/hll_redis_counter.rb
index 47361d831b2..ed2ce2cecb0 100644
--- a/lib/gitlab/usage_data_counters/hll_redis_counter.rb
+++ b/lib/gitlab/usage_data_counters/hll_redis_counter.rb
@@ -13,15 +13,10 @@ module Gitlab
AggregationMismatch = Class.new(EventError)
SlotMismatch = Class.new(EventError)
CategoryMismatch = Class.new(EventError)
- UnknownAggregationOperator = Class.new(EventError)
InvalidContext = Class.new(EventError)
KNOWN_EVENTS_PATH = File.expand_path('known_events/*.yml', __dir__)
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
- UNION_OF_AGGREGATED_METRICS = 'OR'
- INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
- ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
- AGGREGATED_METRICS_PATH = File.expand_path('aggregated_metrics/*.yml', __dir__)
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
@@ -90,37 +85,40 @@ module Gitlab
events_names = events_for_category(category)
event_results = events_names.each_with_object({}) do |event, hash|
- hash["#{event}_weekly"] = unique_events(event_names: [event], start_date: 7.days.ago.to_date, end_date: Date.current)
- hash["#{event}_monthly"] = unique_events(event_names: [event], start_date: 4.weeks.ago.to_date, end_date: Date.current)
+ hash["#{event}_weekly"] = unique_events(**weekly_time_range.merge(event_names: [event]))
+ hash["#{event}_monthly"] = unique_events(**monthly_time_range.merge(event_names: [event]))
end
if eligible_for_totals?(events_names)
- event_results["#{category}_total_unique_counts_weekly"] = unique_events(event_names: events_names, start_date: 7.days.ago.to_date, end_date: Date.current)
- event_results["#{category}_total_unique_counts_monthly"] = unique_events(event_names: events_names, start_date: 4.weeks.ago.to_date, end_date: Date.current)
+ event_results["#{category}_total_unique_counts_weekly"] = unique_events(**weekly_time_range.merge(event_names: events_names))
+ event_results["#{category}_total_unique_counts_monthly"] = unique_events(**monthly_time_range.merge(event_names: events_names))
end
category_results["#{category}"] = event_results
end
end
- def known_event?(event_name)
- event_for(event_name).present?
+ def weekly_time_range
+ { start_date: 7.days.ago.to_date, end_date: Date.current }
end
- def aggregated_metrics_monthly_data
- aggregated_metrics_data(4.weeks.ago.to_date)
+ def monthly_time_range
+ { start_date: 4.weeks.ago.to_date, end_date: Date.current }
end
- def aggregated_metrics_weekly_data
- aggregated_metrics_data(7.days.ago.to_date)
+ def known_event?(event_name)
+ event_for(event_name).present?
end
def known_events
@known_events ||= load_events(KNOWN_EVENTS_PATH)
end
- def aggregated_metrics
- @aggregated_metrics ||= load_events(AGGREGATED_METRICS_PATH)
+ def calculate_events_union(event_names:, start_date:, end_date:)
+ count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
+ raise SlotMismatch, events unless events_in_same_slot?(events)
+ raise AggregationMismatch, events unless events_same_aggregation?(events)
+ end
end
private
@@ -139,93 +137,6 @@ module Gitlab
Plan.all_plans
end
- def aggregated_metrics_data(start_date)
- aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
- next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
-
- weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: Date.current)
- end
- end
-
- def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
- case aggregation[:operator]
- when UNION_OF_AGGREGATED_METRICS
- calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
- when INTERSECTION_OF_AGGREGATED_METRICS
- calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
- else
- raise UnknownAggregationOperator, "Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"
- end
- end
-
- # calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
- # this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
- def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
- # calculate power of intersection of all given metrics from inclusion exclusion principle
- # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
- # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
- # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
- # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
-
- # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
- subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
-
- # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
- power_of_union_of_all_events = begin
- subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
- calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
- end
-
- # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
- # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
- # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
- # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
- # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
- # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
- subset_powers_size_even = subset_powers_data.size.even?
-
- # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
- sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
-
- # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
- sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
- end
-
- def sum_subset_powers(subset_powers_data, subset_powers_size_even)
- sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
- (index + 1).odd? ? value : -value
- end
-
- (subset_powers_size_even ? -1 : 1) * sum_without_sign
- end
-
- def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
- subset_sizes = (1..(event_names.size - 1))
-
- subset_sizes.map do |subset_size|
- if subset_size > 1
- # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
- event_names.combination(subset_size).sum do |events_subset|
- subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
- calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
- end
- else
- # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
- event_names.sum do |event|
- subset_powers_cache[subset_size][event] ||= \
- unique_events(event_names: event, start_date: start_date, end_date: end_date)
- end
- end
- end
- end
-
- def calculate_events_union(event_names:, start_date:, end_date:)
- count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
- raise SlotMismatch, events unless events_in_same_slot?(events)
- raise AggregationMismatch, events unless events_same_aggregation?(events)
- end
- end
-
def count_unique_events(event_names:, start_date:, end_date:, context: '')
events = events_for(Array(event_names).map(&:to_s))
@@ -340,12 +251,6 @@ module Gitlab
end.flatten
end
- def validate_aggregation_operator!(operator)
- return true if ALLOWED_METRICS_AGGREGATIONS.include?(operator)
-
- raise UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")
- end
-
def weekly_redis_keys(events:, start_date:, end_date:, context: '')
end_date = end_date.end_of_week - 1.week
(start_date.to_date..end_date.to_date).map do |date|