From a7b3f11edd626b7ba2fe4101951d2c9fe884b04a Mon Sep 17 00:00:00 2001 From: Tiago Botelho Date: Tue, 30 Jan 2018 11:28:15 +0000 Subject: Adds get all medians to Cycle Analytics model --- app/models/cycle_analytics.rb | 6 +++ lib/gitlab/cycle_analytics/base_query.rb | 7 +++- lib/gitlab/cycle_analytics/base_stage.rb | 8 ++-- lib/gitlab/cycle_analytics/usage_data.rb | 64 ++++++++++++++++++++++++++++++++ lib/gitlab/database/median.rb | 25 ++++++++----- lib/gitlab/usage_data.rb | 7 ++++ 6 files changed, 102 insertions(+), 15 deletions(-) create mode 100644 lib/gitlab/cycle_analytics/usage_data.rb diff --git a/app/models/cycle_analytics.rb b/app/models/cycle_analytics.rb index d2e626c22e8..a6191e569a7 100644 --- a/app/models/cycle_analytics.rb +++ b/app/models/cycle_analytics.rb @@ -6,6 +6,12 @@ class CycleAnalytics @options = options end + def self.all_medians_per_stage(projects, options) + STAGES.each_with_object({}) do |stage_name, hsh| + hsh[stage_name] = Gitlab::CycleAnalytics::Stage[stage_name].new(projects: projects, options: options).medians&.values || [] + end + end + def summary @summary ||= ::Gitlab::CycleAnalytics::StageSummary.new(@project, from: @options[:from], diff --git a/lib/gitlab/cycle_analytics/base_query.rb b/lib/gitlab/cycle_analytics/base_query.rb index 8b3bc3e440d..fee46fc0dda 100644 --- a/lib/gitlab/cycle_analytics/base_query.rb +++ b/lib/gitlab/cycle_analytics/base_query.rb @@ -14,7 +14,7 @@ module Gitlab def stage_query query = mr_closing_issues_table.join(issue_table).on(issue_table[:id].eq(mr_closing_issues_table[:issue_id])) .join(issue_metrics_table).on(issue_table[:id].eq(issue_metrics_table[:issue_id])) - .where(issue_table[:project_id].eq(@project.id)) # rubocop:disable Gitlab/ModuleWithInstanceVariables + .where(issue_table[:project_id].in(project_ids)) # rubocop:disable Gitlab/ModuleWithInstanceVariables .where(issue_table[:created_at].gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables # Load merge_requests @@ -22,9 +22,14 @@ module Gitlab .on(mr_table[:id].eq(mr_closing_issues_table[:merge_request_id])) .join(mr_metrics_table) .on(mr_table[:id].eq(mr_metrics_table[:merge_request_id])) + .project(issue_table[:project_id].as("project_id")) query end + + def project_ids + @projects.map(&:id) + end end end end diff --git a/lib/gitlab/cycle_analytics/base_stage.rb b/lib/gitlab/cycle_analytics/base_stage.rb index cac31ea8cff..c9de27ec481 100644 --- a/lib/gitlab/cycle_analytics/base_stage.rb +++ b/lib/gitlab/cycle_analytics/base_stage.rb @@ -3,8 +3,8 @@ module Gitlab class BaseStage include BaseQuery - def initialize(project:, options:) - @project = project + def initialize(projects:, options:) + @projects = projects @options = options end @@ -20,7 +20,7 @@ module Gitlab raise NotImplementedError.new("Expected #{self.name} to implement title") end - def median + def medians cte_table = Arel::Table.new("cte_table_for_#{name}") # Build a `SELECT` query. We find the first of the `end_time_attrs` that isn't `NULL` (call this end_time). @@ -31,7 +31,7 @@ module Gitlab cte_table, subtract_datetimes(base_query.dup, start_time_attrs, end_time_attrs, name.to_s)) - median_datetime(cte_table, interval_query, name) + median_datetimes(cte_table, interval_query, name) end def name diff --git a/lib/gitlab/cycle_analytics/usage_data.rb b/lib/gitlab/cycle_analytics/usage_data.rb new file mode 100644 index 00000000000..43ec9f9c493 --- /dev/null +++ b/lib/gitlab/cycle_analytics/usage_data.rb @@ -0,0 +1,64 @@ +module Gitlab + module CycleAnalytics + class UsageData + PROJECTS_LIMIT = 10 + + attr_reader :projects, :options + + def initialize(projects, options) + @projects = projects + @options = options + end + + def to_json + total = 0 + values = {} + + medians_per_stage.each do |stage_name, medians| + medians = medians.compact + + stage_values = { + average: calc_average(medians), + sd: standard_deviation(medians), + missing: projects.length - medians.length + } + + total += stage_values.values.compact.sum + values[stage_name] = stage_values + end + + values[:total] = total + + { avg_cycle_analytics: values } + end + + private + + def medians_per_stage + @medians_per_stage ||= ::CycleAnalytics.all_medians_per_stage(projects, options) + end + + def calc_average(values) + return if values.empty? + + (values.sum / values.length).to_i + end + + def sample_variance(values) + return 0 if values.length <= 1 + + avg = calc_average(values) + sum = values.inject(0) do |acc, val| + acc + (val - avg)**2 + end + + sum / (values.length - 1) + end + + def standard_deviation(values) + Math.sqrt(sample_variance(values)).to_i + end + end + end +end + diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 059054ac9ff..84d79cc5e19 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -2,7 +2,7 @@ module Gitlab module Database module Median - def median_datetime(arel_table, query_so_far, column_sym) + def median_datetimes(arel_table, query_so_far, column_sym) median_queries = if Gitlab::Database.postgresql? pg_median_datetime_sql(arel_table, query_so_far, column_sym) @@ -13,16 +13,16 @@ module Gitlab results = Array.wrap(median_queries).map do |query| ActiveRecord::Base.connection.execute(query) end - extract_median(results).presence + extract_medians(results).presence end - def extract_median(results) + def extract_medians(results) result = results.compact.first if Gitlab::Database.postgresql? - result = result.first.presence - median = result['median'] if result - median.to_f if median + result.values.map do |id, median| + [id, median&.to_f] + end.to_h elsif Gitlab::Database.mysql? result.to_a.flatten.first end @@ -69,17 +69,20 @@ module Gitlab cte_table, arel_table .project( + arel_table[:project_id], arel_table[column_sym].as(column_sym.to_s), - Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), - Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), - arel_table.project("COUNT(1)").as('ct')). + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("rank", []), + Arel::Nodes::Window.new.partition(arel_table[:project_id]) + .order(arel_table[column_sym])).as('row_id'), + arel_table.from(arel_table.alias).project("COUNT(*)").where(arel_table[:project_id].eq(arel_table.alias[:project_id])).as('ct')). # Disallow negative values where(arel_table[column_sym].gteq(zero_interval))) # From the CTE, select either the middle row or the middle two rows (this is accomplished # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the # selected rows, and this is the median value. - cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")) + cte_table.project(cte_table[:project_id]) + .project(average([extract_epoch(cte_table[column_sym])], "median")) .where( Arel::Nodes::Between.new( cte_table[:row_id], @@ -90,6 +93,8 @@ module Gitlab ) ) .with(query_so_far, cte) + .group(cte_table[:project_id]) + .order(cte_table[:project_id]) .to_sql end diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb index 9d13d1d781f..1fa000e933c 100644 --- a/lib/gitlab/usage_data.rb +++ b/lib/gitlab/usage_data.rb @@ -9,6 +9,7 @@ module Gitlab license_usage_data.merge(system_usage_data) .merge(features_usage_data) .merge(components_usage_data) + .merge(cycle_analytics_usage_data) end def to_json(force_refresh: false) @@ -71,6 +72,12 @@ module Gitlab } end + def cycle_analytics_usage_data + projects = Project.sorted_by_activity.limit(Gitlab::CycleAnalytics::UsageData::PROJECTS_LIMIT) + + Gitlab::CycleAnalytics::UsageData.new(projects, { from: 7.days.ago }).to_json + end + def features_usage_data features_usage_data_ce end -- cgit v1.2.1