diff options
Diffstat (limited to 'lib/gitlab/database/median.rb')
-rw-r--r-- | lib/gitlab/database/median.rb | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 84d79cc5e19..de2cb040ad6 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -2,10 +2,10 @@ module Gitlab module Database module Median - def median_datetimes(arel_table, query_so_far, column_sym) + def median_datetimes(arel_table, query_so_far, column_sym, partition_column) median_queries = if Gitlab::Database.postgresql? - pg_median_datetime_sql(arel_table, query_so_far, column_sym) + pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column) elsif Gitlab::Database.mysql? mysql_median_datetime_sql(arel_table, query_so_far, column_sym) end @@ -21,7 +21,7 @@ module Gitlab if Gitlab::Database.postgresql? result.values.map do |id, median| - [id, median&.to_f] + [id.to_i, median&.to_f] end.to_h elsif Gitlab::Database.mysql? result.to_a.flatten.first @@ -53,7 +53,7 @@ module Gitlab ] end - def pg_median_datetime_sql(arel_table, query_so_far, column_sym) + def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column) # Create a CTE with the column we're operating on, row number (after sorting by the column # we're operating on), and count of the table we're operating on (duplicated across) all rows # of the CTE. For example, if we're looking to find the median of the `projects.star_count` @@ -69,19 +69,22 @@ module Gitlab cte_table, arel_table .project( - arel_table[:project_id], + arel_table[partition_column], arel_table[column_sym].as(column_sym.to_s), Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("rank", []), - Arel::Nodes::Window.new.partition(arel_table[:project_id]) + Arel::Nodes::Window.new.partition(arel_table[partition_column]) .order(arel_table[column_sym])).as('row_id'), - arel_table.from(arel_table.alias).project("COUNT(*)").where(arel_table[:project_id].eq(arel_table.alias[:project_id])).as('ct')). + arel_table.from(arel_table.alias) + .project("COUNT(*)") + .where(arel_table[partition_column].eq(arel_table.alias[partition_column])).as('ct')). # Disallow negative values where(arel_table[column_sym].gteq(zero_interval))) # From the CTE, select either the middle row or the middle two rows (this is accomplished # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the # selected rows, and this is the median value. - cte_table.project(cte_table[:project_id]) + cte_table + .project(cte_table[partition_column]) .project(average([extract_epoch(cte_table[column_sym])], "median")) .where( Arel::Nodes::Between.new( @@ -93,8 +96,8 @@ module Gitlab ) ) .with(query_so_far, cte) - .group(cte_table[:project_id]) - .order(cte_table[:project_id]) + .group(cte_table[partition_column]) + .order(cte_table[partition_column]) .to_sql end |