summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2016-09-21 05:05:02 +0000
committerStan Hu <stanhu@gmail.com>2016-09-21 05:05:02 +0000
commit5416ab8a0df000bfa9f853840d44d992a975db83 (patch)
tree0fc99b8b83235de0db18b967aef9ec1feacfe1f3 /lib
parent0c7f38bd5b59458a94a9637e06287c8bbbaec82d (diff)
parent244ec0a84c969454bfa05f66dedb22f2b1172323 (diff)
downloadgitlab-ce-5416ab8a0df000bfa9f853840d44d992a975db83.tar.gz
Merge branch '21170-cycle-analytics' into 'master'
Cycle Analytics: first iteration ## What does this MR do? - Implement the first iteration of the "Cycle Analytics" feature. ## What are the relevant issue numbers? - Closes #21170 ## Screenshots ![cycle_analytics_screencast.gif](/uploads/d23c3c912caa6935fd47b53ca3a56b97/cycle_analytics.gif) ## Backend Tasks - [x] Implementation - [x] Phases - [x] Issue (Tracker) - [x] Plan (Board) - [x] Code (IDE) - [x] Test (CI) - [x] Review (MR) - [x] Staging (CD) - [x] Production (Total) - [x] Make heuristics more modular - [x] Scope to project - [x] Date range (30 days, 90 days) - [x] Access restriction - [x] Test - [x] Find a better way to test these phases - [x] Phases - [x] Issue (Tracker) - [x] Plan (Board) - [x] Code (IDE) - [x] Test (CI) - [x] Review (MR) - [x] Staging (CD) - [x] Production (Total) - [x] Test for "end case happens before start case" - [x] Consolidate helper - [x] Miniboss review - [x] Performance testing with mock data - [x] Improve performance - [x] Pre-calculate "merge requests closing issues - [x] Pre-calculate everything else - [x] Test performance against 10k issues - [x] Test all pre-calculation code - [x] Ci::Pipeline -> build start/finish - [x] Ci::Pipeline#merge_requests - [x] Issue -> record default metrics after save - [x] MergeRequest -> record default metrics after save - [x] Deployment -> Update "first_deployed_to_production_at" for MR metrics - [x] Git Push -> Update "first commit mention" for issue metrics - [x] Merge request create/update/refresh -> Update "merge requests closing issues" - [x] Remove `MergeRequestsClosingIssues` when necessary - [x] Changes to unblock Fatih - [x] Add summary data - [x] `stats` should be array - [x] Let `stats` be `null` if all `stats` are null - [x] Indexes for "merge requests closing issues" - [x] Test summary data - [x] Scope everything to project - [x] Find out why tests were passing - [x] Filter should include issues/MRs which have made it to production within the range - [x] Don't create duplicate `MergeRequestsClosingIssues` - [x] Fix tests - [x] MySQL median - [x] Assign to Douwe for review - [x] Fix conflicts - [x] Implement suggestions from Yorick's review - [x] Test on PG - [x] Test on MySQL - [x] Refactor - [x] Cleanup - [x] What happens if we have no data at all? - [x] Extract common queries to methods / scopes - [x] Remove unused queries - [x] Downtime for foreign key migrations - [x] Find a way around "if issue.metrics.present?" all over the place - [x] Find a way around "if merge_request.metrics.present?" all over the place - [x] Test migrations on a fresh database - [x] MySQL - [x] Pg - [x] Access issues - While the project is public and the visibility is set to "Everyone with access", you cannot visit the cycle analytics page when signed out. - [x] CHANGELOG - [x] Implement suggestions from Douwe's review - [x] First set of comments - [x] Second set of comments - [x] Third set of comments - [x] Fourth set of comments - [x] Make sure build is green - [ ] Make issue for "polish" - [ ] EE MR See merge request !5986
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/database/date_time.rb27
-rw-r--r--lib/gitlab/database/median.rb112
2 files changed, 139 insertions, 0 deletions
diff --git a/lib/gitlab/database/date_time.rb b/lib/gitlab/database/date_time.rb
new file mode 100644
index 00000000000..b6a89f715fd
--- /dev/null
+++ b/lib/gitlab/database/date_time.rb
@@ -0,0 +1,27 @@
+module Gitlab
+ module Database
+ module DateTime
+ # Find the first of the `end_time_attrs` that isn't `NULL`. Subtract from it
+ # the first of the `start_time_attrs` that isn't NULL. `SELECT` the resulting interval
+ # along with an alias specified by the `as` parameter.
+ #
+ # Note: For MySQL, the interval is returned in seconds.
+ # For PostgreSQL, the interval is returned as an INTERVAL type.
+ def subtract_datetimes(query_so_far, end_time_attrs, start_time_attrs, as)
+ diff_fn = if Gitlab::Database.postgresql?
+ Arel::Nodes::Subtraction.new(
+ Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs)),
+ Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs)))
+ elsif Gitlab::Database.mysql?
+ Arel::Nodes::NamedFunction.new(
+ "TIMESTAMPDIFF",
+ [Arel.sql('second'),
+ Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs)),
+ Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs))])
+ end
+
+ query_so_far.project(diff_fn.as(as))
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb
new file mode 100644
index 00000000000..1444d25ebc7
--- /dev/null
+++ b/lib/gitlab/database/median.rb
@@ -0,0 +1,112 @@
+# https://www.periscopedata.com/blog/medians-in-sql.html
+module Gitlab
+ module Database
+ module Median
+ def median_datetime(arel_table, query_so_far, column_sym)
+ median_queries =
+ if Gitlab::Database.postgresql?
+ pg_median_datetime_sql(arel_table, query_so_far, column_sym)
+ elsif Gitlab::Database.mysql?
+ mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
+ end
+
+ results = Array.wrap(median_queries).map do |query|
+ ActiveRecord::Base.connection.execute(query)
+ end
+ extract_median(results).presence
+ end
+
+ def extract_median(results)
+ result = results.compact.first
+
+ if Gitlab::Database.postgresql?
+ result = result.first.presence
+ median = result['median'] if result
+ median.to_f if median
+ elsif Gitlab::Database.mysql?
+ result.to_a.flatten.first
+ end
+ end
+
+ def mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
+ query = arel_table.
+ from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)).
+ project(average([arel_table[column_sym]], 'median')).
+ where(
+ Arel::Nodes::Between.new(
+ Arel.sql("(select @row_id := @row_id + 1)"),
+ Arel::Nodes::And.new(
+ [Arel.sql('@ct/2.0'),
+ Arel.sql('@ct/2.0 + 1')]
+ )
+ )
+ ).
+ # Disallow negative values
+ where(arel_table[column_sym].gteq(0))
+
+ [
+ Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"),
+ Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"),
+ Arel.sql("set @row_id := 0;"),
+ query.to_sql,
+ Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")
+ ]
+ end
+
+ def pg_median_datetime_sql(arel_table, query_so_far, column_sym)
+ # Create a CTE with the column we're operating on, row number (after sorting by the column
+ # we're operating on), and count of the table we're operating on (duplicated across) all rows
+ # of the CTE. For example, if we're looking to find the median of the `projects.star_count`
+ # column, the CTE might look like this:
+ #
+ # star_count | row_id | ct
+ # ------------+--------+----
+ # 5 | 1 | 3
+ # 9 | 2 | 3
+ # 15 | 3 | 3
+ cte_table = Arel::Table.new("ordered_records")
+ cte = Arel::Nodes::As.new(
+ cte_table,
+ arel_table.
+ project(
+ arel_table[column_sym].as(column_sym.to_s),
+ Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
+ Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
+ arel_table.project("COUNT(1)").as('ct')).
+ # Disallow negative values
+ where(arel_table[column_sym].gteq(zero_interval)))
+
+ # From the CTE, select either the middle row or the middle two rows (this is accomplished
+ # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
+ # selected rows, and this is the median value.
+ cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")).
+ where(
+ Arel::Nodes::Between.new(
+ cte_table[:row_id],
+ Arel::Nodes::And.new(
+ [(cte_table[:ct] / Arel.sql('2.0')),
+ (cte_table[:ct] / Arel.sql('2.0') + 1)]
+ )
+ )
+ ).
+ with(query_so_far, cte).
+ to_sql
+ end
+
+ private
+
+ def average(args, as)
+ Arel::Nodes::NamedFunction.new("AVG", args, as)
+ end
+
+ def extract_epoch(arel_attribute)
+ Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")})
+ end
+
+ # Need to cast '0' to an INTERVAL before we can check if the interval is positive
+ def zero_interval
+ Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")])
+ end
+ end
+ end
+end