diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2020-08-19 18:10:34 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2020-08-19 18:10:34 +0000 |
commit | 2f5731cf536deff075d1011814f271cbb1ed67e2 (patch) | |
tree | f6e6dec098a60039b1413dae64d24c0bf55bf03d /lib | |
parent | 74b5b3ffcb9fe4d9424bc2bf35e43f749f76d023 (diff) | |
download | gitlab-ce-2f5731cf536deff075d1011814f271cbb1ed67e2.tar.gz |
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib')
-rw-r--r-- | lib/extracts_ref.rb | 75 | ||||
-rw-r--r-- | lib/gitlab/analytics/unique_visits.rb | 69 | ||||
-rw-r--r-- | lib/gitlab/metrics/dashboard/validator.rb | 14 | ||||
-rw-r--r-- | lib/gitlab/metrics/dashboard/validator/client.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/metrics/dashboard/validator/schemas/panel.json | 2 | ||||
-rw-r--r-- | lib/gitlab/repository_cache_adapter.rb | 16 | ||||
-rw-r--r-- | lib/gitlab/usage_data.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/usage_data_counters/hll_redis_counter.rb | 149 | ||||
-rw-r--r-- | lib/gitlab/usage_data_counters/known_events.yml | 88 |
9 files changed, 311 insertions, 112 deletions
diff --git a/lib/extracts_ref.rb b/lib/extracts_ref.rb index adbbe6c0e50..5ef2d888550 100644 --- a/lib/extracts_ref.rb +++ b/lib/extracts_ref.rb @@ -40,50 +40,11 @@ module ExtractsRef # Returns an Array where the first value is the tree-ish and the second is the # path def extract_ref(id) - pair = ['', ''] - - return pair unless repository_container - - if id =~ /^(\h{40})(.+)/ - # If the ref appears to be a SHA, we're done, just split the string - pair = $~.captures - elsif id.exclude?('/') - # If the ID contains no slash, we must have a ref and no path, so - # we can skip the Redis calls below - pair = [id, ''] - else - # Otherwise, attempt to detect the ref using a list of the repository_container's - # branches and tags - - # Append a trailing slash if we only get a ref and no file path - unless id.ends_with?('/') - id = [id, '/'].join - end - - first_path_segment, rest = id.split('/', 2) - - if use_first_path_segment?(first_path_segment) - pair = [first_path_segment, rest] - else - valid_refs = ref_names.select { |v| id.start_with?("#{v}/") } - - if valid_refs.empty? - # No exact ref match, so just try our best - pair = id.match(%r{([^/]+)(.*)}).captures - else - # There is a distinct possibility that multiple refs prefix the ID. - # Use the longest match to maximize the chance that we have the - # right ref. - best_match = valid_refs.max_by(&:length) - # Partition the string into the ref and the path, ignoring the empty first value - pair = id.partition(best_match)[1..-1] - end - end - end + pair = extract_raw_ref(id) [ pair[0].strip, - pair[1].gsub(%r{^/|/$}, '') # Remove leading and trailing slashes from path + pair[1].delete_prefix('/').delete_suffix('/') ] end @@ -117,6 +78,38 @@ module ExtractsRef private + def extract_raw_ref(id) + return ['', ''] unless repository_container + + # If the ref appears to be a SHA, we're done, just split the string + return $~.captures if id =~ /^(\h{40})(.+)/ + + # No slash means we must have a ref and no path + return [id, ''] unless id.include?('/') + + # Otherwise, attempt to detect the ref using a list of the + # repository_container's branches and tags + + # Append a trailing slash if we only get a ref and no file path + id = [id, '/'].join unless id.ends_with?('/') + first_path_segment, rest = id.split('/', 2) + + return [first_path_segment, rest] if use_first_path_segment?(first_path_segment) + + valid_refs = ref_names.select { |v| id.start_with?("#{v}/") } + + # No exact ref match, so just try our best + return id.match(%r{([^/]+)(.*)}).captures if valid_refs.empty? + + # There is a distinct possibility that multiple refs prefix the ID. + # Use the longest match to maximize the chance that we have the + # right ref. + best_match = valid_refs.max_by(&:length) + + # Partition the string into the ref and the path, ignoring the empty first value + id.partition(best_match)[1..-1] + end + def use_first_path_segment?(ref) return false unless ::Feature.enabled?(:extracts_path_optimization) return false unless repository_container diff --git a/lib/gitlab/analytics/unique_visits.rb b/lib/gitlab/analytics/unique_visits.rb index 33ea6644fb0..ad746ebbd42 100644 --- a/lib/gitlab/analytics/unique_visits.rb +++ b/lib/gitlab/analytics/unique_visits.rb @@ -3,77 +3,36 @@ module Gitlab module Analytics class UniqueVisits - ANALYTICS_IDS = Set[ - 'g_analytics_contribution', - 'g_analytics_insights', - 'g_analytics_issues', - 'g_analytics_productivity', - 'g_analytics_valuestream', - 'p_analytics_pipelines', - 'p_analytics_code_reviews', - 'p_analytics_valuestream', - 'p_analytics_insights', - 'p_analytics_issues', - 'p_analytics_repo', - 'i_analytics_cohorts', - 'i_analytics_dev_ops_score' - ] - - COMPLIANCE_IDS = Set[ - 'g_compliance_dashboard', - 'g_compliance_audit_events', - 'i_compliance_credential_inventory', - 'i_compliance_audit_events' - ].freeze - - KEY_EXPIRY_LENGTH = 12.weeks - def track_visit(visitor_id, target_id, time = Time.zone.now) - target_key = key(target_id, time) - - Gitlab::Redis::HLL.add(key: target_key, value: visitor_id, expiry: KEY_EXPIRY_LENGTH) + Gitlab::UsageDataCounters::HLLRedisCounter.track_event(visitor_id, target_id, time) end # Returns number of unique visitors for given targets in given time frame # # @param [String, Array[<String>]] targets ids of targets to count visits on. Special case for :any - # @param [ActiveSupport::TimeWithZone] start_week start of time frame - # @param [Integer] weeks time frame length in weeks + # @param [ActiveSupport::TimeWithZone] start_date start of time frame + # @param [ActiveSupport::TimeWithZone] end_date end of time frame # @return [Integer] number of unique visitors - def unique_visits_for(targets:, start_week: 7.days.ago, weeks: 1) + def unique_visits_for(targets:, start_date: 7.days.ago, end_date: start_date + 1.week) target_ids = if targets == :analytics - ANALYTICS_IDS + self.class.analytics_ids elsif targets == :compliance - COMPLIANCE_IDS + self.class.compliance_ids else Array(targets) end - timeframe_start = [start_week, weeks.weeks.ago].min - - redis_keys = keys(targets: target_ids, timeframe_start: timeframe_start, weeks: weeks) - - Gitlab::Redis::HLL.count(keys: redis_keys) + Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: target_ids, start_date: start_date, end_date: end_date) end - private - - def key(target_id, time) - target_ids = ANALYTICS_IDS + COMPLIANCE_IDS - - raise "Invalid target id #{target_id}" unless target_ids.include?(target_id.to_s) - - target_key = target_id.to_s.gsub('analytics', '{analytics}').gsub('compliance', '{compliance}') - - year_week = time.strftime('%G-%V') - - "#{target_key}-#{year_week}" - end + class << self + def analytics_ids + Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('analytics') + end - def keys(targets:, timeframe_start:, weeks:) - (0..(weeks - 1)).map do |week_increment| - targets.map { |target_id| key(target_id, timeframe_start + week_increment * 7.days) } - end.flatten + def compliance_ids + Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('compliance') + end end end end diff --git a/lib/gitlab/metrics/dashboard/validator.rb b/lib/gitlab/metrics/dashboard/validator.rb index a2450c59886..8edd9c397f9 100644 --- a/lib/gitlab/metrics/dashboard/validator.rb +++ b/lib/gitlab/metrics/dashboard/validator.rb @@ -8,18 +8,20 @@ module Gitlab class << self def validate(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil) - errors(content, schema_path, dashboard_path: dashboard_path, project: project).empty? + errors = _validate(content, schema_path, dashboard_path: dashboard_path, project: project) + errors.empty? end def validate!(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil) - errors = errors(content, schema_path, dashboard_path: dashboard_path, project: project) + errors = _validate(content, schema_path, dashboard_path: dashboard_path, project: project) errors.empty? || raise(errors.first) end - def errors(content, schema_path = DASHBOARD_SCHEMA_PATH, dashboard_path: nil, project: nil) - Validator::Client - .new(content, schema_path, dashboard_path: dashboard_path, project: project) - .execute + private + + def _validate(content, schema_path, dashboard_path: nil, project: nil) + client = Validator::Client.new(content, schema_path, dashboard_path: dashboard_path, project: project) + client.execute end end end diff --git a/lib/gitlab/metrics/dashboard/validator/client.rb b/lib/gitlab/metrics/dashboard/validator/client.rb index 588c677ca28..c63415abcfc 100644 --- a/lib/gitlab/metrics/dashboard/validator/client.rb +++ b/lib/gitlab/metrics/dashboard/validator/client.rb @@ -46,7 +46,7 @@ module Gitlab def validate_against_schema schemer.validate(content).map do |error| - ::Gitlab::Metrics::Dashboard::Validator::Errors::SchemaValidationError.new(error) + Errors::SchemaValidationError.new(error) end end end diff --git a/lib/gitlab/metrics/dashboard/validator/schemas/panel.json b/lib/gitlab/metrics/dashboard/validator/schemas/panel.json index 2ae9608036e..011eef53e40 100644 --- a/lib/gitlab/metrics/dashboard/validator/schemas/panel.json +++ b/lib/gitlab/metrics/dashboard/validator/schemas/panel.json @@ -4,7 +4,7 @@ "properties": { "type": { "type": "string", - "enum": ["area-chart", "line-chart", "anomaly-chart", "bar", "column", "stacked-column", "single-stat", "heatmap", "gauge"], + "enum": ["area-chart", "anomaly-chart", "bar", "column", "stacked-column", "single-stat", "heatmap"], "default": "area-chart" }, "title": { "type": "string" }, diff --git a/lib/gitlab/repository_cache_adapter.rb b/lib/gitlab/repository_cache_adapter.rb index da8025d2265..f6a5c6ed754 100644 --- a/lib/gitlab/repository_cache_adapter.rb +++ b/lib/gitlab/repository_cache_adapter.rb @@ -58,11 +58,19 @@ module Gitlab # wrong answer. We handle that by querying the full list - which fills # the cache - and using it directly to answer the question. define_method("#{name}_include?") do |value| - if strong_memoized?(name) || !redis_set_cache.exist?(name) - return __send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend - end + ivar = "@#{name}_include" + memoized = instance_variable_get(ivar) || {} + + next memoized[value] if memoized.key?(value) + + memoized[value] = + if strong_memoized?(name) || !redis_set_cache.exist?(name) + __send__(name).include?(value) # rubocop:disable GitlabSecurity/PublicSend + else + redis_set_cache.include?(name, value) + end - redis_set_cache.include?(name, value) + instance_variable_set(ivar, memoized)[value] end end diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb index 73a80155dbc..70efe86143e 100644 --- a/lib/gitlab/usage_data.rb +++ b/lib/gitlab/usage_data.rb @@ -584,21 +584,21 @@ module Gitlab end def analytics_unique_visits_data - results = ::Gitlab::Analytics::UniqueVisits::ANALYTICS_IDS.each_with_object({}) do |target_id, hash| + results = ::Gitlab::Analytics::UniqueVisits.analytics_ids.each_with_object({}) do |target_id, hash| hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) } end results['analytics_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics) } - results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, weeks: 4) } + results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, start_date: 4.weeks.ago.to_date, end_date: Date.current) } { analytics_unique_visits: results } end def compliance_unique_visits_data - results = ::Gitlab::Analytics::UniqueVisits::COMPLIANCE_IDS.each_with_object({}) do |target_id, hash| + results = ::Gitlab::Analytics::UniqueVisits.compliance_ids.each_with_object({}) do |target_id, hash| hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) } end results['compliance_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance) } - results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, weeks: 4) } + results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, start_date: 4.weeks.ago.to_date, end_date: Date.current) } { compliance_unique_visits: results } end diff --git a/lib/gitlab/usage_data_counters/hll_redis_counter.rb b/lib/gitlab/usage_data_counters/hll_redis_counter.rb new file mode 100644 index 00000000000..c9c39225068 --- /dev/null +++ b/lib/gitlab/usage_data_counters/hll_redis_counter.rb @@ -0,0 +1,149 @@ +# frozen_string_literal: true + +module Gitlab + module UsageDataCounters + module HLLRedisCounter + DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH = 6.weeks + DEFAULT_DAILY_KEY_EXPIRY_LENGTH = 29.days + DEFAULT_REDIS_SLOT = ''.freeze + + UnknownEvent = Class.new(StandardError) + UnknownAggregation = Class.new(StandardError) + + KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'.freeze + ALLOWED_AGGREGATIONS = %i(daily weekly).freeze + + # Track event on entity_id + # Increment a Redis HLL counter for unique event_name and entity_id + # + # All events should be added to know_events file lib/gitlab/usage_data_counters/known_events.yml + # + # Event example: + # + # - name: g_compliance_dashboard # Unique event name + # redis_slot: compliance # Optional slot name, if not defined it will use name as a slot, used for totals + # category: compliance # Group events in categories + # expiry: 29 # Optional expiration time in days, default value 29 days for daily and 6.weeks for weekly + # aggregation: daily # Aggregation level, keys are stored daily or weekly + # + # Usage: + # + # * Track event: Gitlab::UsageDataCounters::HLLRedisCounter.track_event(user_id, 'g_compliance_dashboard') + # * Get unique counts per user: Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'g_compliance_dashboard', start_date: 28.days.ago, end_date: Date.current) + class << self + def track_event(entity_id, event_name, time = Time.zone.now) + event = event_for(event_name) + + raise UnknownEvent.new("Unknown event #{event_name}") unless event.present? + + Gitlab::Redis::HLL.add(key: redis_key(event, time), value: entity_id, expiry: expiry(event)) + end + + def unique_events(event_names:, start_date:, end_date:) + events = events_for(Array(event_names)) + + raise 'Events should be in same slot' unless events_in_same_slot?(events) + raise 'Events should be in same category' unless events_in_same_category?(events) + raise 'Events should have same aggregation level' unless events_same_aggregation?(events) + + aggregation = events.first[:aggregation] + + keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date) + + Gitlab::Redis::HLL.count(keys: keys) + end + + def events_for_category(category) + known_events.select { |event| event[:category] == category }.map { |event| event[:name] } + end + + private + + def keys_for_aggregation(aggregation, events:, start_date:, end_date:) + if aggregation.to_sym == :daily + daily_redis_keys(events: events, start_date: start_date, end_date: end_date) + else + weekly_redis_keys(events: events, start_date: start_date, end_date: end_date) + end + end + + def known_events + @known_events ||= YAML.load_file(Rails.root.join(KNOWN_EVENTS_PATH)).map(&:with_indifferent_access) + end + + def known_events_names + known_events.map { |event| event[:name] } + end + + def events_in_same_slot?(events) + slot = events.first[:redis_slot] + events.all? { |event| event[:redis_slot] == slot } + end + + def events_in_same_category?(events) + category = events.first[:category] + events.all? { |event| event[:category] == category } + end + + def events_same_aggregation?(events) + aggregation = events.first[:aggregation] + events.all? { |event| event[:aggregation] == aggregation } + end + + def expiry(event) + return event[:expiry] if event[:expiry].present? + + event[:aggregation].to_sym == :daily ? DEFAULT_DAILY_KEY_EXPIRY_LENGTH : DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH + end + + def event_for(event_name) + known_events.find { |event| event[:name] == event_name } + end + + def events_for(event_names) + known_events.select { |event| event_names.include?(event[:name]) } + end + + def redis_slot(event) + event[:redis_slot] || DEFAULT_REDIS_SLOT + end + + # Compose the key in order to store events daily or weekly + def redis_key(event, time) + raise UnknownEvent.new("Unknown event #{event[:name]}") unless known_events_names.include?(event[:name].to_s) + raise UnknownAggregation.new("Use :daily or :weekly aggregation") unless ALLOWED_AGGREGATIONS.include?(event[:aggregation].to_sym) + + slot = redis_slot(event) + key = if slot.present? + event[:name].to_s.gsub(slot, "{#{slot}}") + else + "{#{event[:name]}}" + end + + if event[:aggregation].to_sym == :daily + year_day = time.strftime('%G-%j') + "#{year_day}-#{key}" + else + year_week = time.strftime('%G-%V') + "#{key}-#{year_week}" + end + end + + def daily_redis_keys(events:, start_date:, end_date:) + (start_date.to_date..end_date.to_date).map do |date| + events.map { |event| redis_key(event, date) } + end.flatten + end + + def weekly_redis_keys(events:, start_date:, end_date:) + weeks = end_date.to_date.cweek - start_date.to_date.cweek + weeks = 1 if weeks == 0 + + (0..(weeks - 1)).map do |week_increment| + events.map { |event| redis_key(event, start_date + week_increment * 7.days) } + end.flatten + end + end + end + end +end diff --git a/lib/gitlab/usage_data_counters/known_events.yml b/lib/gitlab/usage_data_counters/known_events.yml new file mode 100644 index 00000000000..b7e516fa8b1 --- /dev/null +++ b/lib/gitlab/usage_data_counters/known_events.yml @@ -0,0 +1,88 @@ +--- +# Compliance category +- name: g_compliance_dashboard + redis_slot: compliance + category: compliance + expiry: 84 # expiration time in days, equivalent to 12 weeks + aggregation: weekly +- name: g_compliance_audit_events + category: compliance + redis_slot: compliance + expiry: 84 + aggregation: weekly +- name: i_compliance_audit_events + category: compliance + redis_slot: compliance + expiry: 84 + aggregation: weekly +- name: i_compliance_credential_inventory + category: compliance + redis_slot: compliance + expiry: 84 + aggregation: weekly +# Analytics category +- name: g_analytics_contribution + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: g_analytics_insights + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: g_analytics_issues + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: g_analytics_productivity + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: g_analytics_valuestream + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: p_analytics_pipelines + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: p_analytics_code_reviews + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: p_analytics_valuestream + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: p_analytics_insights + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: p_analytics_issues + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: p_analytics_repo + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: i_analytics_cohorts + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly +- name: i_analytics_dev_ops_score + category: analytics + redis_slot: analytics + expiry: 84 + aggregation: weekly |