summaryrefslogtreecommitdiff
path: root/lib/gitlab/usage
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-02-18 10:34:06 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2021-02-18 10:34:06 +0000
commit859a6fb938bb9ee2a317c46dfa4fcc1af49608f0 (patch)
treed7f2700abe6b4ffcb2dcfc80631b2d87d0609239 /lib/gitlab/usage
parent446d496a6d000c73a304be52587cd9bbc7493136 (diff)
downloadgitlab-ce-859a6fb938bb9ee2a317c46dfa4fcc1af49608f0.tar.gz
Add latest changes from gitlab-org/gitlab@13-9-stable-eev13.9.0-rc42
Diffstat (limited to 'lib/gitlab/usage')
-rw-r--r--lib/gitlab/usage/docs/helper.rb63
-rw-r--r--lib/gitlab/usage/docs/renderer.rb32
-rw-r--r--lib/gitlab/usage/docs/templates/default.md.haml28
-rw-r--r--lib/gitlab/usage/docs/value_formatter.rb26
-rw-r--r--lib/gitlab/usage/metric.rb10
-rw-r--r--lib/gitlab/usage/metric_definition.rb13
-rw-r--r--lib/gitlab/usage/metrics/aggregates/aggregate.rb157
-rw-r--r--lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb75
-rw-r--r--lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb24
9 files changed, 419 insertions, 9 deletions
diff --git a/lib/gitlab/usage/docs/helper.rb b/lib/gitlab/usage/docs/helper.rb
new file mode 100644
index 00000000000..8483334800b
--- /dev/null
+++ b/lib/gitlab/usage/docs/helper.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Usage
+ module Docs
+ # Helper with functions to be used by HAML templates
+ module Helper
+ HEADER = %w(field value).freeze
+ SKIP_KEYS = %i(description).freeze
+
+ def auto_generated_comment
+ <<-MARKDOWN.strip_heredoc
+ ---
+ stage: Growth
+ group: Product Intelligence
+ info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#designated-technical-writers
+ ---
+
+ <!---
+ This documentation is auto generated by a script.
+
+ Please do not edit this file directly, check generate_metrics_dictionary task on lib/tasks/gitlab/usage_data.rake.
+ --->
+
+ <!-- vale gitlab.Spelling = NO -->
+ MARKDOWN
+ end
+
+ def render_name(name)
+ "## `#{name}`\n"
+ end
+
+ def render_description(object)
+ object.description
+ end
+
+ def render_attribute_row(key, value)
+ value = Gitlab::Usage::Docs::ValueFormatter.format(key, value)
+ table_row(["`#{key}`", value])
+ end
+
+ def render_attributes_table(object)
+ <<~MARKDOWN
+
+ #{table_row(HEADER)}
+ #{table_row(HEADER.map { '---' })}
+ #{table_value_rows(object.attributes)}
+ MARKDOWN
+ end
+
+ def table_value_rows(attributes)
+ attributes.reject { |k, _| k.in?(SKIP_KEYS) }.map do |key, value|
+ render_attribute_row(key, value)
+ end.join("\n")
+ end
+
+ def table_row(array)
+ "| #{array.join(' | ')} |"
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/usage/docs/renderer.rb b/lib/gitlab/usage/docs/renderer.rb
new file mode 100644
index 00000000000..7a7c58005bb
--- /dev/null
+++ b/lib/gitlab/usage/docs/renderer.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Usage
+ module Docs
+ class Renderer
+ include Gitlab::Usage::Docs::Helper
+ DICTIONARY_PATH = Rails.root.join('doc', 'development', 'usage_ping')
+ TEMPLATE_PATH = Rails.root.join('lib', 'gitlab', 'usage', 'docs', 'templates', 'default.md.haml')
+
+ def initialize(metrics_definitions)
+ @layout = Haml::Engine.new(File.read(TEMPLATE_PATH))
+ @metrics_definitions = metrics_definitions.sort
+ end
+
+ def contents
+ # Render and remove an extra trailing new line
+ @contents ||= @layout.render(self, metrics_definitions: @metrics_definitions).sub!(/\n(?=\Z)/, '')
+ end
+
+ def write
+ filename = DICTIONARY_PATH.join('dictionary.md').to_s
+
+ FileUtils.mkdir_p(DICTIONARY_PATH)
+ File.write(filename, contents)
+
+ filename
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/usage/docs/templates/default.md.haml b/lib/gitlab/usage/docs/templates/default.md.haml
new file mode 100644
index 00000000000..86e93be66c7
--- /dev/null
+++ b/lib/gitlab/usage/docs/templates/default.md.haml
@@ -0,0 +1,28 @@
+= auto_generated_comment
+
+:plain
+ # Metrics Dictionary
+
+ This file is autogenerated, please do not edit directly.
+
+ To generate these files from the GitLab repository, run:
+
+ ```shell
+ bundle exec rake gitlab:usage_data:generate_metrics_dictionary
+ ```
+
+ The Metrics Dictionary is based on the following metrics definition YAML files:
+
+ - [`config/metrics`]('https://gitlab.com/gitlab-org/gitlab/-/tree/master/config/metrics')
+ - [`ee/config/metrics`](https://gitlab.com/gitlab-org/gitlab/-/tree/master/ee/config/metrics)
+
+Each table includes a `milestone`, which corresponds to the GitLab version when the metric
+was released.
+\
+- metrics_definitions.each do |name, object|
+
+ = render_name(name)
+
+ = render_description(object)
+
+ = render_attributes_table(object)
diff --git a/lib/gitlab/usage/docs/value_formatter.rb b/lib/gitlab/usage/docs/value_formatter.rb
new file mode 100644
index 00000000000..a2dc9b081f8
--- /dev/null
+++ b/lib/gitlab/usage/docs/value_formatter.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Usage
+ module Docs
+ class ValueFormatter
+ def self.format(key, value)
+ case key
+ when :key_path
+ "**`#{value}`**"
+ when :data_source
+ value.to_s.capitalize
+ when :product_group
+ "`#{value}`"
+ when :introduced_by_url
+ "[Introduced by](#{value})"
+ when :distribution, :tier
+ Array(value).join(', ')
+ else
+ value
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/usage/metric.rb b/lib/gitlab/usage/metric.rb
index e1648c78168..f3469209f48 100644
--- a/lib/gitlab/usage/metric.rb
+++ b/lib/gitlab/usage/metric.rb
@@ -7,16 +7,16 @@ module Gitlab
InvalidMetricError = Class.new(RuntimeError)
- attr_accessor :default_generation_path, :value
+ attr_accessor :key_path, :value
- validates :default_generation_path, presence: true
+ validates :key_path, presence: true
def definition
- self.class.definitions[default_generation_path]
+ self.class.definitions[key_path]
end
- def unflatten_default_path
- unflatten(default_generation_path.split('.'), value)
+ def unflatten_key_path
+ unflatten(key_path.split('.'), value)
end
class << self
diff --git a/lib/gitlab/usage/metric_definition.rb b/lib/gitlab/usage/metric_definition.rb
index 96e572bb3db..01d202e4d45 100644
--- a/lib/gitlab/usage/metric_definition.rb
+++ b/lib/gitlab/usage/metric_definition.rb
@@ -13,9 +13,8 @@ module Gitlab
@attributes = opts
end
- # The key is defined by default_generation and full_path
def key
- full_path[default_generation.to_sym]
+ key_path
end
def to_h
@@ -23,8 +22,10 @@ module Gitlab
end
def validate!
- self.class.schemer.validate(attributes.stringify_keys).map do |error|
- Gitlab::ErrorTracking.track_and_raise_for_dev_exception(Metric::InvalidMetricError.new("#{error["details"] || error['data_pointer']} for `#{path}`"))
+ unless skip_validation?
+ self.class.schemer.validate(attributes.stringify_keys).each do |error|
+ Gitlab::ErrorTracking.track_and_raise_for_dev_exception(Metric::InvalidMetricError.new("#{error["details"] || error['data_pointer']} for `#{path}`"))
+ end
end
end
@@ -79,6 +80,10 @@ module Gitlab
def method_missing(method, *args)
attributes[method] || super
end
+
+ def skip_validation?
+ !!attributes[:skip_validation]
+ end
end
end
end
diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb
new file mode 100644
index 00000000000..1fc40798320
--- /dev/null
+++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Usage
+ module Metrics
+ module Aggregates
+ UNION_OF_AGGREGATED_METRICS = 'OR'
+ INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
+ ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
+ AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
+ AggregatedMetricError = Class.new(StandardError)
+ UnknownAggregationOperator = Class.new(AggregatedMetricError)
+ UnknownAggregationSource = Class.new(AggregatedMetricError)
+
+ DATABASE_SOURCE = 'database'
+ REDIS_SOURCE = 'redis'
+
+ SOURCES = {
+ DATABASE_SOURCE => Sources::PostgresHll,
+ REDIS_SOURCE => Sources::RedisHll
+ }.freeze
+
+ class Aggregate
+ delegate :weekly_time_range,
+ :monthly_time_range,
+ to: Gitlab::UsageDataCounters::HLLRedisCounter
+
+ def initialize(recorded_at)
+ @aggregated_metrics = load_metrics(AGGREGATED_METRICS_PATH)
+ @recorded_at = recorded_at
+ end
+
+ def monthly_data
+ aggregated_metrics_data(**monthly_time_range)
+ end
+
+ def weekly_data
+ aggregated_metrics_data(**weekly_time_range)
+ end
+
+ private
+
+ attr_accessor :aggregated_metrics, :recorded_at
+
+ def aggregated_metrics_data(start_date:, end_date:)
+ aggregated_metrics.each_with_object({}) do |aggregation, data|
+ next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: :yaml, type: :development)
+
+ case aggregation[:source]
+ when REDIS_SOURCE
+ data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
+ when DATABASE_SOURCE
+ next unless Feature.enabled?('database_sourced_aggregated_metrics', default_enabled: false, type: :development)
+
+ data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
+ else
+ Gitlab::ErrorTracking
+ .track_and_raise_for_dev_exception(UnknownAggregationSource.new("Aggregation source: '#{aggregation[:source]}' must be included in #{SOURCES.keys}"))
+
+ data[aggregation[:name]] = Gitlab::Utils::UsageData::FALLBACK
+ end
+ end
+ end
+
+ def calculate_count_for_aggregation(aggregation:, start_date:, end_date:)
+ source = SOURCES[aggregation[:source]]
+
+ case aggregation[:operator]
+ when UNION_OF_AGGREGATED_METRICS
+ source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at)
+ when INTERSECTION_OF_AGGREGATED_METRICS
+ calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date)
+ else
+ Gitlab::ErrorTracking
+ .track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
+ Gitlab::Utils::UsageData::FALLBACK
+ end
+ rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error
+ Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
+ Gitlab::Utils::UsageData::FALLBACK
+ end
+
+ # calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
+ # this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
+ def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
+ # calculate power of intersection of all given metrics from inclusion exclusion principle
+ # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
+ # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
+ # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
+ # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
+
+ # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
+ subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
+
+ # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
+ power_of_union_of_all_metrics = begin
+ subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
+ source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
+ end
+
+ # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
+ # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
+ # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
+ # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
+ # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
+ # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
+ subset_powers_size_even = subset_powers_data.size.even?
+
+ # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
+ sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
+
+ # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
+ sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
+ end
+
+ def sum_subset_powers(subset_powers_data, subset_powers_size_even)
+ sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
+ (index + 1).odd? ? value : -value
+ end
+
+ (subset_powers_size_even ? -1 : 1) * sum_without_sign
+ end
+
+ def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
+ subset_sizes = (1...metric_names.size)
+
+ subset_sizes.map do |subset_size|
+ if subset_size > 1
+ # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
+ metric_names.combination(subset_size).sum do |metrics_subset|
+ subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
+ calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
+ end
+ else
+ # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
+ metric_names.sum do |metric|
+ subset_powers_cache[subset_size][metric] ||= \
+ source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
+ end
+ end
+ end
+ end
+
+ def load_metrics(wildcard)
+ Dir[wildcard].each_with_object([]) do |path, metrics|
+ metrics.push(*load_yaml_from_path(path))
+ end
+ end
+
+ def load_yaml_from_path(path)
+ YAML.safe_load(File.read(path), aliases: true)&.map(&:with_indifferent_access)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb
new file mode 100644
index 00000000000..33678d2b813
--- /dev/null
+++ b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb
@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Usage
+ module Metrics
+ module Aggregates
+ module Sources
+ class PostgresHll
+ class << self
+ def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:)
+ time_period = start_date && end_date ? (start_date..end_date) : nil
+
+ Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets|
+ json = read_aggregated_metric(metric_name: event, time_period: time_period, recorded_at: recorded_at)
+ raise UnionNotAvailable, "Union data not available for #{metric_names}" unless json
+
+ buckets.merge_hash!(Gitlab::Json.parse(json))
+ end.estimated_distinct_count
+ end
+
+ def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
+ unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
+ Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
+ return
+ end
+
+ # Usage Ping report generation for gitlab.com is very long running process
+ # to make sure that saved keys are available at the end of report generation process
+ # lets use triple max generation time
+ keys_expiration = ::Gitlab::UsageData::MAX_GENERATION_TIME_FOR_SAAS * 3
+
+ Gitlab::Redis::SharedState.with do |redis|
+ redis.set(
+ redis_key(metric_name: metric_name, time_period: time_period&.values&.first, recorded_at: recorded_at_timestamp),
+ data.to_json,
+ ex: keys_expiration
+ )
+ end
+ rescue ::Redis::CommandError => e
+ Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
+ end
+
+ private
+
+ def read_aggregated_metric(metric_name:, time_period:, recorded_at:)
+ Gitlab::Redis::SharedState.with do |redis|
+ redis.get(redis_key(metric_name: metric_name, time_period: time_period, recorded_at: recorded_at))
+ end
+ end
+
+ def redis_key(metric_name:, time_period:, recorded_at:)
+ # add timestamp at the end of the key to avoid stale keys if
+ # usage ping job is retried
+ "#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at.to_i}"
+ end
+
+ def time_period_to_human_name(time_period)
+ return Gitlab::Utils::UsageData::ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
+
+ start_date = time_period.first.to_date
+ end_date = time_period.last.to_date
+
+ if (end_date - start_date).to_i > 7
+ Gitlab::Utils::UsageData::MONTHLY_PERIOD_HUMAN_NAME
+ else
+ Gitlab::Utils::UsageData::WEEKLY_PERIOD_HUMAN_NAME
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb
new file mode 100644
index 00000000000..f3a4dcf1e31
--- /dev/null
+++ b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Usage
+ module Metrics
+ module Aggregates
+ module Sources
+ UnionNotAvailable = Class.new(AggregatedMetricError)
+
+ class RedisHll
+ def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil)
+ union = Gitlab::UsageDataCounters::HLLRedisCounter
+ .calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date)
+
+ return union if union >= 0
+
+ raise UnionNotAvailable, "Union data not available for #{metric_names}"
+ end
+ end
+ end
+ end
+ end
+ end
+end