diff options
author | Tiago Botelho <tiagonbotelho@hotmail.com> | 2018-02-15 13:23:39 +0000 |
---|---|---|
committer | Tiago Botelho <tiagonbotelho@hotmail.com> | 2018-02-28 10:46:20 +0000 |
commit | 335ee79a73fafdf00fac6e8ffc286ce4bad273ff (patch) | |
tree | d30033fd007c992448ce02f42e46f9da48e223cb | |
parent | 3f31da9c69c550d1698a1376e37d36f4e6e309b5 (diff) | |
download | gitlab-ce-335ee79a73fafdf00fac6e8ffc286ce4bad273ff.tar.gz |
Refactors median code to work with both single and multiple projects
-rw-r--r-- | app/models/cycle_analytics.rb | 4 | ||||
-rw-r--r-- | app/serializers/analytics_stage_entity.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/cycle_analytics/base_query.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/cycle_analytics/base_stage.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/cycle_analytics/usage_data.rb | 48 | ||||
-rw-r--r-- | lib/gitlab/database/median.rb | 140 | ||||
-rw-r--r-- | lib/gitlab/usage_data.rb | 7 | ||||
-rw-r--r-- | spec/features/cycle_analytics_spec.rb | 14 | ||||
-rw-r--r-- | spec/lib/gitlab/cycle_analytics/usage_data_spec.rb | 36 | ||||
-rw-r--r-- | spec/lib/gitlab/database/median_spec.rb | 15 | ||||
-rw-r--r-- | spec/models/cycle_analytics_spec.rb | 30 | ||||
-rw-r--r-- | spec/support/cycle_analytics_helpers.rb | 12 | ||||
-rw-r--r-- | spec/support/cycle_analytics_helpers/test_generation.rb | 2 |
13 files changed, 236 insertions, 88 deletions
diff --git a/app/models/cycle_analytics.rb b/app/models/cycle_analytics.rb index 52eb07ae7d6..b34d1382d43 100644 --- a/app/models/cycle_analytics.rb +++ b/app/models/cycle_analytics.rb @@ -7,8 +7,8 @@ class CycleAnalytics end def all_medians_per_stage - STAGES.each_with_object({}) do |stage_name, hsh| - hsh[stage_name] = self[stage_name].median + STAGES.each_with_object({}) do |stage_name, medians_per_stage| + medians_per_stage[stage_name] = self[stage_name].median end end diff --git a/app/serializers/analytics_stage_entity.rb b/app/serializers/analytics_stage_entity.rb index f87eb78e0e8..a9e331559b1 100644 --- a/app/serializers/analytics_stage_entity.rb +++ b/app/serializers/analytics_stage_entity.rb @@ -7,6 +7,8 @@ class AnalyticsStageEntity < Grape::Entity expose :description expose :median, as: :value do |stage| - distance_of_time_in_words(stage.median) if stage.median && !(stage.median.blank? || stage.median.zero?) + if stage.median && !(stage.median.nil? || stage.median.zero?) + distance_of_time_in_words(stage.median) + end end end diff --git a/lib/gitlab/cycle_analytics/base_query.rb b/lib/gitlab/cycle_analytics/base_query.rb index b9075047487..def9885789e 100644 --- a/lib/gitlab/cycle_analytics/base_query.rb +++ b/lib/gitlab/cycle_analytics/base_query.rb @@ -8,14 +8,14 @@ module Gitlab private def base_query - @base_query ||= stage_query([@project.id]) # rubocop:disable Gitlab/ModuleWithInstanceVariables + @base_query ||= stage_query(@project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables end def stage_query(project_ids) query = mr_closing_issues_table.join(issue_table).on(issue_table[:id].eq(mr_closing_issues_table[:issue_id])) .join(issue_metrics_table).on(issue_table[:id].eq(issue_metrics_table[:issue_id])) .project(issue_table[:project_id].as("project_id")) - .where(issue_table[:project_id].in(project_ids)) + .where(issue_table[:project_id].in(Array(project_ids))) .where(issue_table[:created_at].gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables # Load merge_requests diff --git a/lib/gitlab/cycle_analytics/base_stage.rb b/lib/gitlab/cycle_analytics/base_stage.rb index 45386fee622..5c3fd901f79 100644 --- a/lib/gitlab/cycle_analytics/base_stage.rb +++ b/lib/gitlab/cycle_analytics/base_stage.rb @@ -31,8 +31,12 @@ module Gitlab interval_query = Arel::Nodes::As.new(cte_table, subtract_datetimes(stage_query(project_ids), start_time_attrs, end_time_attrs, name.to_s)) - median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median| - loader.call(project_id, median) + if project_ids.size == 1 + loader.call(@project.id, median_datetime(cte_table, interval_query, name)) + else + median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median| + loader.call(project_id, median) + end end end end diff --git a/lib/gitlab/cycle_analytics/usage_data.rb b/lib/gitlab/cycle_analytics/usage_data.rb index 0aefd434e14..5122e3417ca 100644 --- a/lib/gitlab/cycle_analytics/usage_data.rb +++ b/lib/gitlab/cycle_analytics/usage_data.rb @@ -5,27 +5,21 @@ module Gitlab attr_reader :projects, :options - def initialize(projects, options) - @projects = projects - @options = options + def initialize + @projects = Project.sorted_by_activity.limit(PROJECTS_LIMIT) + @options = { from: 7.days.ago } end def to_json total = 0 - values = {} - medians_per_stage.each do |stage_name, medians| - medians = medians.map(&:presence).compact + values = + medians_per_stage.each_with_object({}) do |(stage_name, medians), hsh| + calculations = stage_values(medians) - stage_values = { - average: calc_average(medians), - sd: standard_deviation(medians), - missing: projects.length - medians.length - } - - total += stage_values.values.compact.sum - values[stage_name] = stage_values - end + total += calculations.values.compact.sum + hsh[stage_name] = calculations + end values[:total] = total @@ -43,26 +37,36 @@ module Gitlab end end + def stage_values(medians) + medians = medians.map(&:presence).compact + average = calc_average(medians) + + { + average: average, + sd: standard_deviation(medians, average), + missing: projects.length - medians.length + } + end + def calc_average(values) return if values.empty? (values.sum / values.length).to_i end - def sample_variance(values) + def standard_deviation(values, average) + Math.sqrt(sample_variance(values, average)).to_i + end + + def sample_variance(values, average) return 0 if values.length <= 1 - avg = calc_average(values) sum = values.inject(0) do |acc, val| - acc + (val - avg)**2 + acc + (val - average)**2 end sum / (values.length - 1) end - - def standard_deviation(values) - Math.sqrt(sample_variance(values)).to_i - end end end end diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index de2cb040ad6..2f9dd914674 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -2,32 +2,34 @@ module Gitlab module Database module Median + def median_datetime(arel_table, query_so_far, column_sym) + extract_median(execute_queries(arel_table, query_so_far, column_sym)).presence + end + def median_datetimes(arel_table, query_so_far, column_sym, partition_column) - median_queries = - if Gitlab::Database.postgresql? - pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column) - elsif Gitlab::Database.mysql? - mysql_median_datetime_sql(arel_table, query_so_far, column_sym) - end - - results = Array.wrap(median_queries).map do |query| - ActiveRecord::Base.connection.execute(query) - end - extract_medians(results).presence + extract_medians(execute_queries(arel_table, query_so_far, column_sym, partition_column)).presence end - def extract_medians(results) + def extract_median(results) result = results.compact.first if Gitlab::Database.postgresql? - result.values.map do |id, median| - [id.to_i, median&.to_f] - end.to_h + result = result.first.presence + + result['median']&.to_f if result elsif Gitlab::Database.mysql? result.to_a.flatten.first end end + def extract_medians(results) + return {} if Gitlab::Database.mysql? + + results.compact.first.values.map do |id, median| + [id.to_i, median&.to_f] + end.to_h + end + def mysql_median_datetime_sql(arel_table, query_so_far, column_sym) query = arel_table .from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)) @@ -53,7 +55,7 @@ module Gitlab ] end - def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column) + def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil) # Create a CTE with the column we're operating on, row number (after sorting by the column # we're operating on), and count of the table we're operating on (duplicated across) all rows # of the CTE. For example, if we're looking to find the median of the `projects.star_count` @@ -64,49 +66,107 @@ module Gitlab # 5 | 1 | 3 # 9 | 2 | 3 # 15 | 3 | 3 + # + # If a partition column is used we will do the same operation but for separate partitions, + # when that happens the CTE might look like this: + # + # project_id | star_count | row_id | ct + # ------------+------------+--------+---- + # 1 | 5 | 1 | 2 + # 1 | 9 | 2 | 2 + # 2 | 10 | 1 | 3 + # 2 | 15 | 2 | 3 + # 2 | 20 | 3 | 3 cte_table = Arel::Table.new("ordered_records") + cte = Arel::Nodes::As.new( cte_table, - arel_table - .project( - arel_table[partition_column], - arel_table[column_sym].as(column_sym.to_s), - Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("rank", []), - Arel::Nodes::Window.new.partition(arel_table[partition_column]) - .order(arel_table[column_sym])).as('row_id'), - arel_table.from(arel_table.alias) - .project("COUNT(*)") - .where(arel_table[partition_column].eq(arel_table.alias[partition_column])).as('ct')). + arel_table.project(*rank_rows(arel_table, column_sym, partition_column)). # Disallow negative values where(arel_table[column_sym].gteq(zero_interval))) # From the CTE, select either the middle row or the middle two rows (this is accomplished # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the # selected rows, and this is the median value. - cte_table - .project(cte_table[partition_column]) - .project(average([extract_epoch(cte_table[column_sym])], "median")) - .where( - Arel::Nodes::Between.new( - cte_table[:row_id], - Arel::Nodes::And.new( - [(cte_table[:ct] / Arel.sql('2.0')), - (cte_table[:ct] / Arel.sql('2.0') + 1)] + result = + cte_table + .project(*median_projections(cte_table, column_sym, partition_column)) + .where( + Arel::Nodes::Between.new( + cte_table[:row_id], + Arel::Nodes::And.new( + [(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)] + ) ) ) - ) - .with(query_so_far, cte) - .group(cte_table[partition_column]) - .order(cte_table[partition_column]) - .to_sql + .with(query_so_far, cte) + + result.group(cte_table[partition_column]).order(cte_table[partition_column]) if partition_column + + result.to_sql end private + def median_queries(arel_table, query_so_far, column_sym, partition_column = nil) + if Gitlab::Database.postgresql? + pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column) + elsif Gitlab::Database.mysql? + mysql_median_datetime_sql(arel_table, query_so_far, column_sym) + end + end + + def execute_queries(arel_table, query_so_far, column_sym, partition_column = nil) + queries = median_queries(arel_table, query_so_far, column_sym, partition_column) + + Array.wrap(queries).map { |query| ActiveRecord::Base.connection.execute(query) } + end + def average(args, as) Arel::Nodes::NamedFunction.new("AVG", args, as) end + def rank_rows(arel_table, column_sym, partition_column) + column_row = arel_table[column_sym].as(column_sym.to_s) + + if partition_column + partition_row = arel_table[partition_column] + row_id = + Arel::Nodes::Over.new( + Arel::Nodes::NamedFunction.new('rank', []), + Arel::Nodes::Window.new.partition(arel_table[partition_column]) + .order(arel_table[column_sym]) + ).as('row_id') + + count = arel_table.from(arel_table.alias) + .project('COUNT(*)') + .where(arel_table[partition_column].eq(arel_table.alias[partition_column])) + .as('ct') + + [partition_row, column_row, row_id, count] + else + row_id = + Arel::Nodes::Over.new( + Arel::Nodes::NamedFunction.new('row_number', []), + Arel::Nodes::Window.new.order(arel_table[column_sym]) + ).as('row_id') + + count = arel_table.project("COUNT(1)").as('ct') + + [column_row, row_id, count] + end + end + + def median_projections(table, column_sym, partition_column) + if partition_column + [table[partition_column], + average([extract_epoch(table[column_sym])], "median")] + else + [average([extract_epoch(table[column_sym])], "median")] + end + end + def extract_epoch(arel_attribute) Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")}) end diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb index dae0e20e156..37d3512990e 100644 --- a/lib/gitlab/usage_data.rb +++ b/lib/gitlab/usage_data.rb @@ -73,12 +73,7 @@ module Gitlab end def cycle_analytics_usage_data - # We only want to generate this data for instances that use PostgreSQL - return {} if Gitlab::Database.mysql? - - projects = Project.sorted_by_activity.limit(Gitlab::CycleAnalytics::UsageData::PROJECTS_LIMIT) - - Gitlab::CycleAnalytics::UsageData.new(projects, { from: 7.days.ago }).to_json + Gitlab::CycleAnalytics::UsageData.new.to_json end def features_usage_data diff --git a/spec/features/cycle_analytics_spec.rb b/spec/features/cycle_analytics_spec.rb index 510677ecf56..ce7c0cac9d8 100644 --- a/spec/features/cycle_analytics_spec.rb +++ b/spec/features/cycle_analytics_spec.rb @@ -41,7 +41,7 @@ feature 'Cycle Analytics', :js do allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue]) project.add_master(user) - create_cycle + @build = create_cycle(user, project, issue, mr, milestone, pipeline) deploy_master sign_in(user) @@ -117,7 +117,7 @@ feature 'Cycle Analytics', :js do project.add_guest(guest) allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue]) - create_cycle + create_cycle(user, project, issue, mr, milestone, pipeline) deploy_master sign_in(guest) @@ -166,16 +166,6 @@ feature 'Cycle Analytics', :js do expect(find('.stage-events')).to have_content("!#{mr.iid}") end - def create_cycle - issue.update(milestone: milestone) - pipeline.run - - @build = create(:ci_build, pipeline: pipeline, status: :success, author: user) - - merge_merge_requests_closing_issue(issue) - ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash) - end - def click_stage(stage_name) find('.stage-nav li', text: stage_name).click wait_for_requests diff --git a/spec/lib/gitlab/cycle_analytics/usage_data_spec.rb b/spec/lib/gitlab/cycle_analytics/usage_data_spec.rb new file mode 100644 index 00000000000..4815e91665a --- /dev/null +++ b/spec/lib/gitlab/cycle_analytics/usage_data_spec.rb @@ -0,0 +1,36 @@ +require 'spec_helper' + +describe Gitlab::CycleAnalytics::UsageData do + let(:project) { create(:project, :repository) } + let(:user) { create(:user, :admin) } + let(:issue) { create(:issue, project: project, created_at: 2.days.ago) } + let(:milestone) { create(:milestone, project: project) } + let(:mr) { create_merge_request_closing_issue(issue, commit_message: "References #{issue.to_reference}") } + let(:pipeline) { create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr) } + + subject { described_class.new([project]) } + + describe '#to_json' do + before do + allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue]) + + create_cycle(user, project, issue, mr, milestone, pipeline) + deploy_master + end + + it 'returns the aggregated usage data of every selected project' do + result = subject.to_json + avg_cycle_analytics = result[:avg_cycle_analytics] + + expect(result).to have_key(:avg_cycle_analytics) + CycleAnalytics::STAGES.each do |stage_name| + stage_values = avg_cycle_analytics[stage_name] + + expect(avg_cycle_analytics).to have_key(stage_name) + expect(stage_values).to have_key(:average) + expect(stage_values).to have_key(:sd) + expect(stage_values).to have_key(:missing) + end + end + end +end diff --git a/spec/lib/gitlab/database/median_spec.rb b/spec/lib/gitlab/database/median_spec.rb new file mode 100644 index 00000000000..36718a1f47b --- /dev/null +++ b/spec/lib/gitlab/database/median_spec.rb @@ -0,0 +1,15 @@ +require 'spec_helper' + +describe Gitlab::Database::Median do + describe '#extract_medians' do + context 'when using MySQL' do + it 'returns an empty hash' do + values = [["1", "1000"]] + + allow(Gitlab::Database).to receive(:mysql?).and_return(true) + + expect(described_class.new.extract_median(values)).eq({}) + end + end + end +end diff --git a/spec/models/cycle_analytics_spec.rb b/spec/models/cycle_analytics_spec.rb new file mode 100644 index 00000000000..bbb91a4aae5 --- /dev/null +++ b/spec/models/cycle_analytics_spec.rb @@ -0,0 +1,30 @@ +require 'spec_helper' + +describe CycleAnalytics do + let(:project) { create(:project, :repository) } + let(:from_date) { 10.days.ago } + let(:user) { create(:user, :admin) } + let(:issue) { create(:issue, project: project, created_at: 2.days.ago) } + let(:milestone) { create(:milestone, project: project) } + let(:mr) { create_merge_request_closing_issue(issue, commit_message: "References #{issue.to_reference}") } + let(:pipeline) { create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr) } + + subject { described_class.new(project, from: from_date) } + + describe '#all_medians_per_stage' do + before do + allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue]) + + create_cycle(user, project, issue, mr, milestone, pipeline) + deploy_master + end + + it 'returns every median for each stage for a specific project' do + values = described_class::STAGES.each_with_object({}) do |stage_name, hsh| + hsh[stage_name] = subject[stage_name].median.presence + end + + expect(subject.all_medians_per_stage).to eq(values) + end + end +end diff --git a/spec/support/cycle_analytics_helpers.rb b/spec/support/cycle_analytics_helpers.rb index d5ef80cfab2..7440894f1f4 100644 --- a/spec/support/cycle_analytics_helpers.rb +++ b/spec/support/cycle_analytics_helpers.rb @@ -26,6 +26,18 @@ module CycleAnalyticsHelpers ref: 'refs/heads/master').execute end + def create_cycle(user, project, issue, mr, milestone, pipeline) + issue.update(milestone: milestone) + pipeline.run + + ci_build = create(:ci_build, pipeline: pipeline, status: :success, author: user) + + merge_merge_requests_closing_issue(issue) + ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash) + + ci_build + end + def create_merge_request_closing_issue(issue, message: nil, source_branch: nil, commit_message: 'commit message') if !source_branch || project.repository.commit(source_branch).blank? source_branch = generate(:branch) diff --git a/spec/support/cycle_analytics_helpers/test_generation.rb b/spec/support/cycle_analytics_helpers/test_generation.rb index da501a5077a..19b32c84d81 100644 --- a/spec/support/cycle_analytics_helpers/test_generation.rb +++ b/spec/support/cycle_analytics_helpers/test_generation.rb @@ -50,7 +50,7 @@ module CycleAnalyticsHelpers end median_time_difference = time_differences.sort[2] - expect(subject[phase].median.presence).to be_within(5).of(median_time_difference) + expect(subject[phase].median).to be_within(5).of(median_time_difference) end context "when the data belongs to another project" do |