diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-02-18 10:34:06 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-02-18 10:34:06 +0000 |
commit | 859a6fb938bb9ee2a317c46dfa4fcc1af49608f0 (patch) | |
tree | d7f2700abe6b4ffcb2dcfc80631b2d87d0609239 /lib/gitlab/usage | |
parent | 446d496a6d000c73a304be52587cd9bbc7493136 (diff) | |
download | gitlab-ce-859a6fb938bb9ee2a317c46dfa4fcc1af49608f0.tar.gz |
Add latest changes from gitlab-org/gitlab@13-9-stable-eev13.9.0-rc42
Diffstat (limited to 'lib/gitlab/usage')
-rw-r--r-- | lib/gitlab/usage/docs/helper.rb | 63 | ||||
-rw-r--r-- | lib/gitlab/usage/docs/renderer.rb | 32 | ||||
-rw-r--r-- | lib/gitlab/usage/docs/templates/default.md.haml | 28 | ||||
-rw-r--r-- | lib/gitlab/usage/docs/value_formatter.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/usage/metric.rb | 10 | ||||
-rw-r--r-- | lib/gitlab/usage/metric_definition.rb | 13 | ||||
-rw-r--r-- | lib/gitlab/usage/metrics/aggregates/aggregate.rb | 157 | ||||
-rw-r--r-- | lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb | 75 | ||||
-rw-r--r-- | lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb | 24 |
9 files changed, 419 insertions, 9 deletions
diff --git a/lib/gitlab/usage/docs/helper.rb b/lib/gitlab/usage/docs/helper.rb new file mode 100644 index 00000000000..8483334800b --- /dev/null +++ b/lib/gitlab/usage/docs/helper.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Docs + # Helper with functions to be used by HAML templates + module Helper + HEADER = %w(field value).freeze + SKIP_KEYS = %i(description).freeze + + def auto_generated_comment + <<-MARKDOWN.strip_heredoc + --- + stage: Growth + group: Product Intelligence + info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#designated-technical-writers + --- + + <!--- + This documentation is auto generated by a script. + + Please do not edit this file directly, check generate_metrics_dictionary task on lib/tasks/gitlab/usage_data.rake. + ---> + + <!-- vale gitlab.Spelling = NO --> + MARKDOWN + end + + def render_name(name) + "## `#{name}`\n" + end + + def render_description(object) + object.description + end + + def render_attribute_row(key, value) + value = Gitlab::Usage::Docs::ValueFormatter.format(key, value) + table_row(["`#{key}`", value]) + end + + def render_attributes_table(object) + <<~MARKDOWN + + #{table_row(HEADER)} + #{table_row(HEADER.map { '---' })} + #{table_value_rows(object.attributes)} + MARKDOWN + end + + def table_value_rows(attributes) + attributes.reject { |k, _| k.in?(SKIP_KEYS) }.map do |key, value| + render_attribute_row(key, value) + end.join("\n") + end + + def table_row(array) + "| #{array.join(' | ')} |" + end + end + end + end +end diff --git a/lib/gitlab/usage/docs/renderer.rb b/lib/gitlab/usage/docs/renderer.rb new file mode 100644 index 00000000000..7a7c58005bb --- /dev/null +++ b/lib/gitlab/usage/docs/renderer.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Docs + class Renderer + include Gitlab::Usage::Docs::Helper + DICTIONARY_PATH = Rails.root.join('doc', 'development', 'usage_ping') + TEMPLATE_PATH = Rails.root.join('lib', 'gitlab', 'usage', 'docs', 'templates', 'default.md.haml') + + def initialize(metrics_definitions) + @layout = Haml::Engine.new(File.read(TEMPLATE_PATH)) + @metrics_definitions = metrics_definitions.sort + end + + def contents + # Render and remove an extra trailing new line + @contents ||= @layout.render(self, metrics_definitions: @metrics_definitions).sub!(/\n(?=\Z)/, '') + end + + def write + filename = DICTIONARY_PATH.join('dictionary.md').to_s + + FileUtils.mkdir_p(DICTIONARY_PATH) + File.write(filename, contents) + + filename + end + end + end + end +end diff --git a/lib/gitlab/usage/docs/templates/default.md.haml b/lib/gitlab/usage/docs/templates/default.md.haml new file mode 100644 index 00000000000..86e93be66c7 --- /dev/null +++ b/lib/gitlab/usage/docs/templates/default.md.haml @@ -0,0 +1,28 @@ += auto_generated_comment + +:plain + # Metrics Dictionary + + This file is autogenerated, please do not edit directly. + + To generate these files from the GitLab repository, run: + + ```shell + bundle exec rake gitlab:usage_data:generate_metrics_dictionary + ``` + + The Metrics Dictionary is based on the following metrics definition YAML files: + + - [`config/metrics`]('https://gitlab.com/gitlab-org/gitlab/-/tree/master/config/metrics') + - [`ee/config/metrics`](https://gitlab.com/gitlab-org/gitlab/-/tree/master/ee/config/metrics) + +Each table includes a `milestone`, which corresponds to the GitLab version when the metric +was released. +\ +- metrics_definitions.each do |name, object| + + = render_name(name) + + = render_description(object) + + = render_attributes_table(object) diff --git a/lib/gitlab/usage/docs/value_formatter.rb b/lib/gitlab/usage/docs/value_formatter.rb new file mode 100644 index 00000000000..a2dc9b081f8 --- /dev/null +++ b/lib/gitlab/usage/docs/value_formatter.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Docs + class ValueFormatter + def self.format(key, value) + case key + when :key_path + "**`#{value}`**" + when :data_source + value.to_s.capitalize + when :product_group + "`#{value}`" + when :introduced_by_url + "[Introduced by](#{value})" + when :distribution, :tier + Array(value).join(', ') + else + value + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metric.rb b/lib/gitlab/usage/metric.rb index e1648c78168..f3469209f48 100644 --- a/lib/gitlab/usage/metric.rb +++ b/lib/gitlab/usage/metric.rb @@ -7,16 +7,16 @@ module Gitlab InvalidMetricError = Class.new(RuntimeError) - attr_accessor :default_generation_path, :value + attr_accessor :key_path, :value - validates :default_generation_path, presence: true + validates :key_path, presence: true def definition - self.class.definitions[default_generation_path] + self.class.definitions[key_path] end - def unflatten_default_path - unflatten(default_generation_path.split('.'), value) + def unflatten_key_path + unflatten(key_path.split('.'), value) end class << self diff --git a/lib/gitlab/usage/metric_definition.rb b/lib/gitlab/usage/metric_definition.rb index 96e572bb3db..01d202e4d45 100644 --- a/lib/gitlab/usage/metric_definition.rb +++ b/lib/gitlab/usage/metric_definition.rb @@ -13,9 +13,8 @@ module Gitlab @attributes = opts end - # The key is defined by default_generation and full_path def key - full_path[default_generation.to_sym] + key_path end def to_h @@ -23,8 +22,10 @@ module Gitlab end def validate! - self.class.schemer.validate(attributes.stringify_keys).map do |error| - Gitlab::ErrorTracking.track_and_raise_for_dev_exception(Metric::InvalidMetricError.new("#{error["details"] || error['data_pointer']} for `#{path}`")) + unless skip_validation? + self.class.schemer.validate(attributes.stringify_keys).each do |error| + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(Metric::InvalidMetricError.new("#{error["details"] || error['data_pointer']} for `#{path}`")) + end end end @@ -79,6 +80,10 @@ module Gitlab def method_missing(method, *args) attributes[method] || super end + + def skip_validation? + !!attributes[:skip_validation] + end end end end diff --git a/lib/gitlab/usage/metrics/aggregates/aggregate.rb b/lib/gitlab/usage/metrics/aggregates/aggregate.rb new file mode 100644 index 00000000000..1fc40798320 --- /dev/null +++ b/lib/gitlab/usage/metrics/aggregates/aggregate.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Aggregates + UNION_OF_AGGREGATED_METRICS = 'OR' + INTERSECTION_OF_AGGREGATED_METRICS = 'AND' + ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze + AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml') + AggregatedMetricError = Class.new(StandardError) + UnknownAggregationOperator = Class.new(AggregatedMetricError) + UnknownAggregationSource = Class.new(AggregatedMetricError) + + DATABASE_SOURCE = 'database' + REDIS_SOURCE = 'redis' + + SOURCES = { + DATABASE_SOURCE => Sources::PostgresHll, + REDIS_SOURCE => Sources::RedisHll + }.freeze + + class Aggregate + delegate :weekly_time_range, + :monthly_time_range, + to: Gitlab::UsageDataCounters::HLLRedisCounter + + def initialize(recorded_at) + @aggregated_metrics = load_metrics(AGGREGATED_METRICS_PATH) + @recorded_at = recorded_at + end + + def monthly_data + aggregated_metrics_data(**monthly_time_range) + end + + def weekly_data + aggregated_metrics_data(**weekly_time_range) + end + + private + + attr_accessor :aggregated_metrics, :recorded_at + + def aggregated_metrics_data(start_date:, end_date:) + aggregated_metrics.each_with_object({}) do |aggregation, data| + next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: :yaml, type: :development) + + case aggregation[:source] + when REDIS_SOURCE + data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date) + when DATABASE_SOURCE + next unless Feature.enabled?('database_sourced_aggregated_metrics', default_enabled: false, type: :development) + + data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date) + else + Gitlab::ErrorTracking + .track_and_raise_for_dev_exception(UnknownAggregationSource.new("Aggregation source: '#{aggregation[:source]}' must be included in #{SOURCES.keys}")) + + data[aggregation[:name]] = Gitlab::Utils::UsageData::FALLBACK + end + end + end + + def calculate_count_for_aggregation(aggregation:, start_date:, end_date:) + source = SOURCES[aggregation[:source]] + + case aggregation[:operator] + when UNION_OF_AGGREGATED_METRICS + source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at) + when INTERSECTION_OF_AGGREGATED_METRICS + calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date) + else + Gitlab::ErrorTracking + .track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")) + Gitlab::Utils::UsageData::FALLBACK + end + rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error) + Gitlab::Utils::UsageData::FALLBACK + end + + # calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle + # this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391 + def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({})) + # calculate power of intersection of all given metrics from inclusion exclusion principle + # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) => + # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| + # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => + # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| + + # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... + subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache) + + # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D| + power_of_union_of_all_metrics = begin + subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \ + source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + end + + # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate, + # is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below + # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| => + # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| + # |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| => + # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| + subset_powers_size_even = subset_powers_data.size.even? + + # sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... => + sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even) + + # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D| + sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics) + end + + def sum_subset_powers(subset_powers_data, subset_powers_size_even) + sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index| + (index + 1).odd? ? value : -value + end + + (subset_powers_size_even ? -1 : 1) * sum_without_sign + end + + def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache) + subset_sizes = (1...metric_names.size) + + subset_sizes.map do |subset_size| + if subset_size > 1 + # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|) + metric_names.combination(subset_size).sum do |metrics_subset| + subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||= + calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache) + end + else + # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ... + metric_names.sum do |metric| + subset_powers_cache[subset_size][metric] ||= \ + source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at) + end + end + end + end + + def load_metrics(wildcard) + Dir[wildcard].each_with_object([]) do |path, metrics| + metrics.push(*load_yaml_from_path(path)) + end + end + + def load_yaml_from_path(path) + YAML.safe_load(File.read(path), aliases: true)&.map(&:with_indifferent_access) + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb new file mode 100644 index 00000000000..33678d2b813 --- /dev/null +++ b/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Aggregates + module Sources + class PostgresHll + class << self + def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:) + time_period = start_date && end_date ? (start_date..end_date) : nil + + Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets| + json = read_aggregated_metric(metric_name: event, time_period: time_period, recorded_at: recorded_at) + raise UnionNotAvailable, "Union data not available for #{metric_names}" unless json + + buckets.merge_hash!(Gitlab::Json.parse(json)) + end.estimated_distinct_count + end + + def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:) + unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}")) + return + end + + # Usage Ping report generation for gitlab.com is very long running process + # to make sure that saved keys are available at the end of report generation process + # lets use triple max generation time + keys_expiration = ::Gitlab::UsageData::MAX_GENERATION_TIME_FOR_SAAS * 3 + + Gitlab::Redis::SharedState.with do |redis| + redis.set( + redis_key(metric_name: metric_name, time_period: time_period&.values&.first, recorded_at: recorded_at_timestamp), + data.to_json, + ex: keys_expiration + ) + end + rescue ::Redis::CommandError => e + Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) + end + + private + + def read_aggregated_metric(metric_name:, time_period:, recorded_at:) + Gitlab::Redis::SharedState.with do |redis| + redis.get(redis_key(metric_name: metric_name, time_period: time_period, recorded_at: recorded_at)) + end + end + + def redis_key(metric_name:, time_period:, recorded_at:) + # add timestamp at the end of the key to avoid stale keys if + # usage ping job is retried + "#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at.to_i}" + end + + def time_period_to_human_name(time_period) + return Gitlab::Utils::UsageData::ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank? + + start_date = time_period.first.to_date + end_date = time_period.last.to_date + + if (end_date - start_date).to_i > 7 + Gitlab::Utils::UsageData::MONTHLY_PERIOD_HUMAN_NAME + else + Gitlab::Utils::UsageData::WEEKLY_PERIOD_HUMAN_NAME + end + end + end + end + end + end + end + end +end diff --git a/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb new file mode 100644 index 00000000000..f3a4dcf1e31 --- /dev/null +++ b/lib/gitlab/usage/metrics/aggregates/sources/redis_hll.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Gitlab + module Usage + module Metrics + module Aggregates + module Sources + UnionNotAvailable = Class.new(AggregatedMetricError) + + class RedisHll + def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil) + union = Gitlab::UsageDataCounters::HLLRedisCounter + .calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date) + + return union if union >= 0 + + raise UnionNotAvailable, "Union data not available for #{metric_names}" + end + end + end + end + end + end +end |