diff options
author | Stan Hu <stanhu@gmail.com> | 2016-09-21 05:05:02 +0000 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2016-09-21 05:05:02 +0000 |
commit | 5416ab8a0df000bfa9f853840d44d992a975db83 (patch) | |
tree | 0fc99b8b83235de0db18b967aef9ec1feacfe1f3 /lib | |
parent | 0c7f38bd5b59458a94a9637e06287c8bbbaec82d (diff) | |
parent | 244ec0a84c969454bfa05f66dedb22f2b1172323 (diff) | |
download | gitlab-ce-5416ab8a0df000bfa9f853840d44d992a975db83.tar.gz |
Merge branch '21170-cycle-analytics' into 'master'
Cycle Analytics: first iteration
## What does this MR do?
- Implement the first iteration of the "Cycle Analytics" feature.
## What are the relevant issue numbers?
- Closes #21170
## Screenshots
![cycle_analytics_screencast.gif](/uploads/d23c3c912caa6935fd47b53ca3a56b97/cycle_analytics.gif)
## Backend Tasks
- [x] Implementation
- [x] Phases
- [x] Issue (Tracker)
- [x] Plan (Board)
- [x] Code (IDE)
- [x] Test (CI)
- [x] Review (MR)
- [x] Staging (CD)
- [x] Production (Total)
- [x] Make heuristics more modular
- [x] Scope to project
- [x] Date range (30 days, 90 days)
- [x] Access restriction
- [x] Test
- [x] Find a better way to test these phases
- [x] Phases
- [x] Issue (Tracker)
- [x] Plan (Board)
- [x] Code (IDE)
- [x] Test (CI)
- [x] Review (MR)
- [x] Staging (CD)
- [x] Production (Total)
- [x] Test for "end case happens before start case"
- [x] Consolidate helper
- [x] Miniboss review
- [x] Performance testing with mock data
- [x] Improve performance
- [x] Pre-calculate "merge requests closing issues
- [x] Pre-calculate everything else
- [x] Test performance against 10k issues
- [x] Test all pre-calculation code
- [x] Ci::Pipeline -> build start/finish
- [x] Ci::Pipeline#merge_requests
- [x] Issue -> record default metrics after save
- [x] MergeRequest -> record default metrics after save
- [x] Deployment -> Update "first_deployed_to_production_at" for MR metrics
- [x] Git Push -> Update "first commit mention" for issue metrics
- [x] Merge request create/update/refresh -> Update "merge requests closing issues"
- [x] Remove `MergeRequestsClosingIssues` when necessary
- [x] Changes to unblock Fatih
- [x] Add summary data
- [x] `stats` should be array
- [x] Let `stats` be `null` if all `stats` are null
- [x] Indexes for "merge requests closing issues"
- [x] Test summary data
- [x] Scope everything to project
- [x] Find out why tests were passing
- [x] Filter should include issues/MRs which have made it to production within the range
- [x] Don't create duplicate `MergeRequestsClosingIssues`
- [x] Fix tests
- [x] MySQL median
- [x] Assign to Douwe for review
- [x] Fix conflicts
- [x] Implement suggestions from Yorick's review
- [x] Test on PG
- [x] Test on MySQL
- [x] Refactor
- [x] Cleanup
- [x] What happens if we have no data at all?
- [x] Extract common queries to methods / scopes
- [x] Remove unused queries
- [x] Downtime for foreign key migrations
- [x] Find a way around "if issue.metrics.present?" all over the place
- [x] Find a way around "if merge_request.metrics.present?" all over the place
- [x] Test migrations on a fresh database
- [x] MySQL
- [x] Pg
- [x] Access issues
- While the project is public and the visibility is set to "Everyone with access", you cannot visit the cycle analytics page when signed out.
- [x] CHANGELOG
- [x] Implement suggestions from Douwe's review
- [x] First set of comments
- [x] Second set of comments
- [x] Third set of comments
- [x] Fourth set of comments
- [x] Make sure build is green
- [ ] Make issue for "polish"
- [ ] EE MR
See merge request !5986
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/database/date_time.rb | 27 | ||||
-rw-r--r-- | lib/gitlab/database/median.rb | 112 |
2 files changed, 139 insertions, 0 deletions
diff --git a/lib/gitlab/database/date_time.rb b/lib/gitlab/database/date_time.rb new file mode 100644 index 00000000000..b6a89f715fd --- /dev/null +++ b/lib/gitlab/database/date_time.rb @@ -0,0 +1,27 @@ +module Gitlab + module Database + module DateTime + # Find the first of the `end_time_attrs` that isn't `NULL`. Subtract from it + # the first of the `start_time_attrs` that isn't NULL. `SELECT` the resulting interval + # along with an alias specified by the `as` parameter. + # + # Note: For MySQL, the interval is returned in seconds. + # For PostgreSQL, the interval is returned as an INTERVAL type. + def subtract_datetimes(query_so_far, end_time_attrs, start_time_attrs, as) + diff_fn = if Gitlab::Database.postgresql? + Arel::Nodes::Subtraction.new( + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs)), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs))) + elsif Gitlab::Database.mysql? + Arel::Nodes::NamedFunction.new( + "TIMESTAMPDIFF", + [Arel.sql('second'), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs)), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs))]) + end + + query_so_far.project(diff_fn.as(as)) + end + end + end +end diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb new file mode 100644 index 00000000000..1444d25ebc7 --- /dev/null +++ b/lib/gitlab/database/median.rb @@ -0,0 +1,112 @@ +# https://www.periscopedata.com/blog/medians-in-sql.html +module Gitlab + module Database + module Median + def median_datetime(arel_table, query_so_far, column_sym) + median_queries = + if Gitlab::Database.postgresql? + pg_median_datetime_sql(arel_table, query_so_far, column_sym) + elsif Gitlab::Database.mysql? + mysql_median_datetime_sql(arel_table, query_so_far, column_sym) + end + + results = Array.wrap(median_queries).map do |query| + ActiveRecord::Base.connection.execute(query) + end + extract_median(results).presence + end + + def extract_median(results) + result = results.compact.first + + if Gitlab::Database.postgresql? + result = result.first.presence + median = result['median'] if result + median.to_f if median + elsif Gitlab::Database.mysql? + result.to_a.flatten.first + end + end + + def mysql_median_datetime_sql(arel_table, query_so_far, column_sym) + query = arel_table. + from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). + project(average([arel_table[column_sym]], 'median')). + where( + Arel::Nodes::Between.new( + Arel.sql("(select @row_id := @row_id + 1)"), + Arel::Nodes::And.new( + [Arel.sql('@ct/2.0'), + Arel.sql('@ct/2.0 + 1')] + ) + ) + ). + # Disallow negative values + where(arel_table[column_sym].gteq(0)) + + [ + Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), + Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), + Arel.sql("set @row_id := 0;"), + query.to_sql, + Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};") + ] + end + + def pg_median_datetime_sql(arel_table, query_so_far, column_sym) + # Create a CTE with the column we're operating on, row number (after sorting by the column + # we're operating on), and count of the table we're operating on (duplicated across) all rows + # of the CTE. For example, if we're looking to find the median of the `projects.star_count` + # column, the CTE might look like this: + # + # star_count | row_id | ct + # ------------+--------+---- + # 5 | 1 | 3 + # 9 | 2 | 3 + # 15 | 3 | 3 + cte_table = Arel::Table.new("ordered_records") + cte = Arel::Nodes::As.new( + cte_table, + arel_table. + project( + arel_table[column_sym].as(column_sym.to_s), + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), + Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), + arel_table.project("COUNT(1)").as('ct')). + # Disallow negative values + where(arel_table[column_sym].gteq(zero_interval))) + + # From the CTE, select either the middle row or the middle two rows (this is accomplished + # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the + # selected rows, and this is the median value. + cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")). + where( + Arel::Nodes::Between.new( + cte_table[:row_id], + Arel::Nodes::And.new( + [(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)] + ) + ) + ). + with(query_so_far, cte). + to_sql + end + + private + + def average(args, as) + Arel::Nodes::NamedFunction.new("AVG", args, as) + end + + def extract_epoch(arel_attribute) + Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")}) + end + + # Need to cast '0' to an INTERVAL before we can check if the interval is positive + def zero_interval + Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")]) + end + end + end +end |