summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTiago Botelho <tiagonbotelho@hotmail.com>2018-02-15 13:23:39 +0000
committerTiago Botelho <tiagonbotelho@hotmail.com>2018-02-28 10:46:20 +0000
commit335ee79a73fafdf00fac6e8ffc286ce4bad273ff (patch)
treed30033fd007c992448ce02f42e46f9da48e223cb
parent3f31da9c69c550d1698a1376e37d36f4e6e309b5 (diff)
downloadgitlab-ce-335ee79a73fafdf00fac6e8ffc286ce4bad273ff.tar.gz
Refactors median code to work with both single and multiple projects
-rw-r--r--app/models/cycle_analytics.rb4
-rw-r--r--app/serializers/analytics_stage_entity.rb4
-rw-r--r--lib/gitlab/cycle_analytics/base_query.rb4
-rw-r--r--lib/gitlab/cycle_analytics/base_stage.rb8
-rw-r--r--lib/gitlab/cycle_analytics/usage_data.rb48
-rw-r--r--lib/gitlab/database/median.rb140
-rw-r--r--lib/gitlab/usage_data.rb7
-rw-r--r--spec/features/cycle_analytics_spec.rb14
-rw-r--r--spec/lib/gitlab/cycle_analytics/usage_data_spec.rb36
-rw-r--r--spec/lib/gitlab/database/median_spec.rb15
-rw-r--r--spec/models/cycle_analytics_spec.rb30
-rw-r--r--spec/support/cycle_analytics_helpers.rb12
-rw-r--r--spec/support/cycle_analytics_helpers/test_generation.rb2
13 files changed, 236 insertions, 88 deletions
diff --git a/app/models/cycle_analytics.rb b/app/models/cycle_analytics.rb
index 52eb07ae7d6..b34d1382d43 100644
--- a/app/models/cycle_analytics.rb
+++ b/app/models/cycle_analytics.rb
@@ -7,8 +7,8 @@ class CycleAnalytics
end
def all_medians_per_stage
- STAGES.each_with_object({}) do |stage_name, hsh|
- hsh[stage_name] = self[stage_name].median
+ STAGES.each_with_object({}) do |stage_name, medians_per_stage|
+ medians_per_stage[stage_name] = self[stage_name].median
end
end
diff --git a/app/serializers/analytics_stage_entity.rb b/app/serializers/analytics_stage_entity.rb
index f87eb78e0e8..a9e331559b1 100644
--- a/app/serializers/analytics_stage_entity.rb
+++ b/app/serializers/analytics_stage_entity.rb
@@ -7,6 +7,8 @@ class AnalyticsStageEntity < Grape::Entity
expose :description
expose :median, as: :value do |stage|
- distance_of_time_in_words(stage.median) if stage.median && !(stage.median.blank? || stage.median.zero?)
+ if stage.median && !(stage.median.nil? || stage.median.zero?)
+ distance_of_time_in_words(stage.median)
+ end
end
end
diff --git a/lib/gitlab/cycle_analytics/base_query.rb b/lib/gitlab/cycle_analytics/base_query.rb
index b9075047487..def9885789e 100644
--- a/lib/gitlab/cycle_analytics/base_query.rb
+++ b/lib/gitlab/cycle_analytics/base_query.rb
@@ -8,14 +8,14 @@ module Gitlab
private
def base_query
- @base_query ||= stage_query([@project.id]) # rubocop:disable Gitlab/ModuleWithInstanceVariables
+ @base_query ||= stage_query(@project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def stage_query(project_ids)
query = mr_closing_issues_table.join(issue_table).on(issue_table[:id].eq(mr_closing_issues_table[:issue_id]))
.join(issue_metrics_table).on(issue_table[:id].eq(issue_metrics_table[:issue_id]))
.project(issue_table[:project_id].as("project_id"))
- .where(issue_table[:project_id].in(project_ids))
+ .where(issue_table[:project_id].in(Array(project_ids)))
.where(issue_table[:created_at].gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables
# Load merge_requests
diff --git a/lib/gitlab/cycle_analytics/base_stage.rb b/lib/gitlab/cycle_analytics/base_stage.rb
index 45386fee622..5c3fd901f79 100644
--- a/lib/gitlab/cycle_analytics/base_stage.rb
+++ b/lib/gitlab/cycle_analytics/base_stage.rb
@@ -31,8 +31,12 @@ module Gitlab
interval_query = Arel::Nodes::As.new(cte_table,
subtract_datetimes(stage_query(project_ids), start_time_attrs, end_time_attrs, name.to_s))
- median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median|
- loader.call(project_id, median)
+ if project_ids.size == 1
+ loader.call(@project.id, median_datetime(cte_table, interval_query, name))
+ else
+ median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median|
+ loader.call(project_id, median)
+ end
end
end
end
diff --git a/lib/gitlab/cycle_analytics/usage_data.rb b/lib/gitlab/cycle_analytics/usage_data.rb
index 0aefd434e14..5122e3417ca 100644
--- a/lib/gitlab/cycle_analytics/usage_data.rb
+++ b/lib/gitlab/cycle_analytics/usage_data.rb
@@ -5,27 +5,21 @@ module Gitlab
attr_reader :projects, :options
- def initialize(projects, options)
- @projects = projects
- @options = options
+ def initialize
+ @projects = Project.sorted_by_activity.limit(PROJECTS_LIMIT)
+ @options = { from: 7.days.ago }
end
def to_json
total = 0
- values = {}
- medians_per_stage.each do |stage_name, medians|
- medians = medians.map(&:presence).compact
+ values =
+ medians_per_stage.each_with_object({}) do |(stage_name, medians), hsh|
+ calculations = stage_values(medians)
- stage_values = {
- average: calc_average(medians),
- sd: standard_deviation(medians),
- missing: projects.length - medians.length
- }
-
- total += stage_values.values.compact.sum
- values[stage_name] = stage_values
- end
+ total += calculations.values.compact.sum
+ hsh[stage_name] = calculations
+ end
values[:total] = total
@@ -43,26 +37,36 @@ module Gitlab
end
end
+ def stage_values(medians)
+ medians = medians.map(&:presence).compact
+ average = calc_average(medians)
+
+ {
+ average: average,
+ sd: standard_deviation(medians, average),
+ missing: projects.length - medians.length
+ }
+ end
+
def calc_average(values)
return if values.empty?
(values.sum / values.length).to_i
end
- def sample_variance(values)
+ def standard_deviation(values, average)
+ Math.sqrt(sample_variance(values, average)).to_i
+ end
+
+ def sample_variance(values, average)
return 0 if values.length <= 1
- avg = calc_average(values)
sum = values.inject(0) do |acc, val|
- acc + (val - avg)**2
+ acc + (val - average)**2
end
sum / (values.length - 1)
end
-
- def standard_deviation(values)
- Math.sqrt(sample_variance(values)).to_i
- end
end
end
end
diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb
index de2cb040ad6..2f9dd914674 100644
--- a/lib/gitlab/database/median.rb
+++ b/lib/gitlab/database/median.rb
@@ -2,32 +2,34 @@
module Gitlab
module Database
module Median
+ def median_datetime(arel_table, query_so_far, column_sym)
+ extract_median(execute_queries(arel_table, query_so_far, column_sym)).presence
+ end
+
def median_datetimes(arel_table, query_so_far, column_sym, partition_column)
- median_queries =
- if Gitlab::Database.postgresql?
- pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
- elsif Gitlab::Database.mysql?
- mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
- end
-
- results = Array.wrap(median_queries).map do |query|
- ActiveRecord::Base.connection.execute(query)
- end
- extract_medians(results).presence
+ extract_medians(execute_queries(arel_table, query_so_far, column_sym, partition_column)).presence
end
- def extract_medians(results)
+ def extract_median(results)
result = results.compact.first
if Gitlab::Database.postgresql?
- result.values.map do |id, median|
- [id.to_i, median&.to_f]
- end.to_h
+ result = result.first.presence
+
+ result['median']&.to_f if result
elsif Gitlab::Database.mysql?
result.to_a.flatten.first
end
end
+ def extract_medians(results)
+ return {} if Gitlab::Database.mysql?
+
+ results.compact.first.values.map do |id, median|
+ [id.to_i, median&.to_f]
+ end.to_h
+ end
+
def mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
query = arel_table
.from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name))
@@ -53,7 +55,7 @@ module Gitlab
]
end
- def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
+ def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
@@ -64,49 +66,107 @@ module Gitlab
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
+ #
+ # If a partition column is used we will do the same operation but for separate partitions,
+ # when that happens the CTE might look like this:
+ #
+ # project_id | star_count | row_id | ct
+ # ------------+------------+--------+----
+ # 1 | 5 | 1 | 2
+ # 1 | 9 | 2 | 2
+ # 2 | 10 | 1 | 3
+ # 2 | 15 | 2 | 3
+ # 2 | 20 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
+
cte = Arel::Nodes::As.new(
cte_table,
- arel_table
- .project(
- arel_table[partition_column],
- arel_table[column_sym].as(column_sym.to_s),
- Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("rank", []),
- Arel::Nodes::Window.new.partition(arel_table[partition_column])
- .order(arel_table[column_sym])).as('row_id'),
- arel_table.from(arel_table.alias)
- .project("COUNT(*)")
- .where(arel_table[partition_column].eq(arel_table.alias[partition_column])).as('ct')).
+ arel_table.project(*rank_rows(arel_table, column_sym, partition_column)).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
- cte_table
- .project(cte_table[partition_column])
- .project(average([extract_epoch(cte_table[column_sym])], "median"))
- .where(
- Arel::Nodes::Between.new(
- cte_table[:row_id],
- Arel::Nodes::And.new(
- [(cte_table[:ct] / Arel.sql('2.0')),
- (cte_table[:ct] / Arel.sql('2.0') + 1)]
+ result =
+ cte_table
+ .project(*median_projections(cte_table, column_sym, partition_column))
+ .where(
+ Arel::Nodes::Between.new(
+ cte_table[:row_id],
+ Arel::Nodes::And.new(
+ [(cte_table[:ct] / Arel.sql('2.0')),
+ (cte_table[:ct] / Arel.sql('2.0') + 1)]
+ )
)
)
- )
- .with(query_so_far, cte)
- .group(cte_table[partition_column])
- .order(cte_table[partition_column])
- .to_sql
+ .with(query_so_far, cte)
+
+ result.group(cte_table[partition_column]).order(cte_table[partition_column]) if partition_column
+
+ result.to_sql
end
private
+ def median_queries(arel_table, query_so_far, column_sym, partition_column = nil)
+ if Gitlab::Database.postgresql?
+ pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
+ elsif Gitlab::Database.mysql?
+ mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
+ end
+ end
+
+ def execute_queries(arel_table, query_so_far, column_sym, partition_column = nil)
+ queries = median_queries(arel_table, query_so_far, column_sym, partition_column)
+
+ Array.wrap(queries).map { |query| ActiveRecord::Base.connection.execute(query) }
+ end
+
def average(args, as)
Arel::Nodes::NamedFunction.new("AVG", args, as)
end
+ def rank_rows(arel_table, column_sym, partition_column)
+ column_row = arel_table[column_sym].as(column_sym.to_s)
+
+ if partition_column
+ partition_row = arel_table[partition_column]
+ row_id =
+ Arel::Nodes::Over.new(
+ Arel::Nodes::NamedFunction.new('rank', []),
+ Arel::Nodes::Window.new.partition(arel_table[partition_column])
+ .order(arel_table[column_sym])
+ ).as('row_id')
+
+ count = arel_table.from(arel_table.alias)
+ .project('COUNT(*)')
+ .where(arel_table[partition_column].eq(arel_table.alias[partition_column]))
+ .as('ct')
+
+ [partition_row, column_row, row_id, count]
+ else
+ row_id =
+ Arel::Nodes::Over.new(
+ Arel::Nodes::NamedFunction.new('row_number', []),
+ Arel::Nodes::Window.new.order(arel_table[column_sym])
+ ).as('row_id')
+
+ count = arel_table.project("COUNT(1)").as('ct')
+
+ [column_row, row_id, count]
+ end
+ end
+
+ def median_projections(table, column_sym, partition_column)
+ if partition_column
+ [table[partition_column],
+ average([extract_epoch(table[column_sym])], "median")]
+ else
+ [average([extract_epoch(table[column_sym])], "median")]
+ end
+ end
+
def extract_epoch(arel_attribute)
Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")})
end
diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb
index dae0e20e156..37d3512990e 100644
--- a/lib/gitlab/usage_data.rb
+++ b/lib/gitlab/usage_data.rb
@@ -73,12 +73,7 @@ module Gitlab
end
def cycle_analytics_usage_data
- # We only want to generate this data for instances that use PostgreSQL
- return {} if Gitlab::Database.mysql?
-
- projects = Project.sorted_by_activity.limit(Gitlab::CycleAnalytics::UsageData::PROJECTS_LIMIT)
-
- Gitlab::CycleAnalytics::UsageData.new(projects, { from: 7.days.ago }).to_json
+ Gitlab::CycleAnalytics::UsageData.new.to_json
end
def features_usage_data
diff --git a/spec/features/cycle_analytics_spec.rb b/spec/features/cycle_analytics_spec.rb
index 510677ecf56..ce7c0cac9d8 100644
--- a/spec/features/cycle_analytics_spec.rb
+++ b/spec/features/cycle_analytics_spec.rb
@@ -41,7 +41,7 @@ feature 'Cycle Analytics', :js do
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
project.add_master(user)
- create_cycle
+ @build = create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master
sign_in(user)
@@ -117,7 +117,7 @@ feature 'Cycle Analytics', :js do
project.add_guest(guest)
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
- create_cycle
+ create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master
sign_in(guest)
@@ -166,16 +166,6 @@ feature 'Cycle Analytics', :js do
expect(find('.stage-events')).to have_content("!#{mr.iid}")
end
- def create_cycle
- issue.update(milestone: milestone)
- pipeline.run
-
- @build = create(:ci_build, pipeline: pipeline, status: :success, author: user)
-
- merge_merge_requests_closing_issue(issue)
- ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash)
- end
-
def click_stage(stage_name)
find('.stage-nav li', text: stage_name).click
wait_for_requests
diff --git a/spec/lib/gitlab/cycle_analytics/usage_data_spec.rb b/spec/lib/gitlab/cycle_analytics/usage_data_spec.rb
new file mode 100644
index 00000000000..4815e91665a
--- /dev/null
+++ b/spec/lib/gitlab/cycle_analytics/usage_data_spec.rb
@@ -0,0 +1,36 @@
+require 'spec_helper'
+
+describe Gitlab::CycleAnalytics::UsageData do
+ let(:project) { create(:project, :repository) }
+ let(:user) { create(:user, :admin) }
+ let(:issue) { create(:issue, project: project, created_at: 2.days.ago) }
+ let(:milestone) { create(:milestone, project: project) }
+ let(:mr) { create_merge_request_closing_issue(issue, commit_message: "References #{issue.to_reference}") }
+ let(:pipeline) { create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr) }
+
+ subject { described_class.new([project]) }
+
+ describe '#to_json' do
+ before do
+ allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
+
+ create_cycle(user, project, issue, mr, milestone, pipeline)
+ deploy_master
+ end
+
+ it 'returns the aggregated usage data of every selected project' do
+ result = subject.to_json
+ avg_cycle_analytics = result[:avg_cycle_analytics]
+
+ expect(result).to have_key(:avg_cycle_analytics)
+ CycleAnalytics::STAGES.each do |stage_name|
+ stage_values = avg_cycle_analytics[stage_name]
+
+ expect(avg_cycle_analytics).to have_key(stage_name)
+ expect(stage_values).to have_key(:average)
+ expect(stage_values).to have_key(:sd)
+ expect(stage_values).to have_key(:missing)
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/database/median_spec.rb b/spec/lib/gitlab/database/median_spec.rb
new file mode 100644
index 00000000000..36718a1f47b
--- /dev/null
+++ b/spec/lib/gitlab/database/median_spec.rb
@@ -0,0 +1,15 @@
+require 'spec_helper'
+
+describe Gitlab::Database::Median do
+ describe '#extract_medians' do
+ context 'when using MySQL' do
+ it 'returns an empty hash' do
+ values = [["1", "1000"]]
+
+ allow(Gitlab::Database).to receive(:mysql?).and_return(true)
+
+ expect(described_class.new.extract_median(values)).eq({})
+ end
+ end
+ end
+end
diff --git a/spec/models/cycle_analytics_spec.rb b/spec/models/cycle_analytics_spec.rb
new file mode 100644
index 00000000000..bbb91a4aae5
--- /dev/null
+++ b/spec/models/cycle_analytics_spec.rb
@@ -0,0 +1,30 @@
+require 'spec_helper'
+
+describe CycleAnalytics do
+ let(:project) { create(:project, :repository) }
+ let(:from_date) { 10.days.ago }
+ let(:user) { create(:user, :admin) }
+ let(:issue) { create(:issue, project: project, created_at: 2.days.ago) }
+ let(:milestone) { create(:milestone, project: project) }
+ let(:mr) { create_merge_request_closing_issue(issue, commit_message: "References #{issue.to_reference}") }
+ let(:pipeline) { create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr) }
+
+ subject { described_class.new(project, from: from_date) }
+
+ describe '#all_medians_per_stage' do
+ before do
+ allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
+
+ create_cycle(user, project, issue, mr, milestone, pipeline)
+ deploy_master
+ end
+
+ it 'returns every median for each stage for a specific project' do
+ values = described_class::STAGES.each_with_object({}) do |stage_name, hsh|
+ hsh[stage_name] = subject[stage_name].median.presence
+ end
+
+ expect(subject.all_medians_per_stage).to eq(values)
+ end
+ end
+end
diff --git a/spec/support/cycle_analytics_helpers.rb b/spec/support/cycle_analytics_helpers.rb
index d5ef80cfab2..7440894f1f4 100644
--- a/spec/support/cycle_analytics_helpers.rb
+++ b/spec/support/cycle_analytics_helpers.rb
@@ -26,6 +26,18 @@ module CycleAnalyticsHelpers
ref: 'refs/heads/master').execute
end
+ def create_cycle(user, project, issue, mr, milestone, pipeline)
+ issue.update(milestone: milestone)
+ pipeline.run
+
+ ci_build = create(:ci_build, pipeline: pipeline, status: :success, author: user)
+
+ merge_merge_requests_closing_issue(issue)
+ ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash)
+
+ ci_build
+ end
+
def create_merge_request_closing_issue(issue, message: nil, source_branch: nil, commit_message: 'commit message')
if !source_branch || project.repository.commit(source_branch).blank?
source_branch = generate(:branch)
diff --git a/spec/support/cycle_analytics_helpers/test_generation.rb b/spec/support/cycle_analytics_helpers/test_generation.rb
index da501a5077a..19b32c84d81 100644
--- a/spec/support/cycle_analytics_helpers/test_generation.rb
+++ b/spec/support/cycle_analytics_helpers/test_generation.rb
@@ -50,7 +50,7 @@ module CycleAnalyticsHelpers
end
median_time_difference = time_differences.sort[2]
- expect(subject[phase].median.presence).to be_within(5).of(median_time_difference)
+ expect(subject[phase].median).to be_within(5).of(median_time_difference)
end
context "when the data belongs to another project" do