diff options
author | Timothy Andrew <mail@timothyandrew.net> | 2016-09-20 12:33:37 +0530 |
---|---|---|
committer | Timothy Andrew <mail@timothyandrew.net> | 2016-09-20 13:23:14 +0530 |
commit | 4ff8d5d28d402284ad365630d9c85b9ae4479b04 (patch) | |
tree | 0c3ee217abde05753700e11adfa8689a62486a63 | |
parent | b43d3af782c52b060205e63720857bbf462d6b6e (diff) | |
download | gitlab-ce-4ff8d5d28d402284ad365630d9c85b9ae4479b04.tar.gz |
Implement a database median strategy for MySQL.
1. Dispatch between the two strategies automatically based on the
current database type.
2. The MySQL version needs to run multiple statements, so the
`cycle_analytics` model is modified to support this.
-rw-r--r-- | app/models/concerns/database_median.rb | 44 | ||||
-rw-r--r-- | app/models/cycle_analytics.rb | 55 |
2 files changed, 76 insertions, 23 deletions
diff --git a/app/models/concerns/database_median.rb b/app/models/concerns/database_median.rb index 5af1ca772ae..19a495a05b8 100644 --- a/app/models/concerns/database_median.rb +++ b/app/models/concerns/database_median.rb @@ -1,13 +1,35 @@ +# https://www.periscopedata.com/blog/medians-in-sql.html module DatabaseMedian extend ActiveSupport::Concern def median_datetime(arel_table, query_so_far, column_sym) - # TODO: MySQL - pg_median_datetime(arel_table, query_so_far, column_sym) + case ActiveRecord::Base.connection.adapter_name + when 'PostgreSQL' + pg_median_datetime(arel_table, query_so_far, column_sym) + when 'Mysql2' + mysql_median_datetime(arel_table, query_so_far, column_sym) + else + raise NotImplementedError, "We haven't implemented a database median strategy for your database type." + end end + def mysql_median_datetime(arel_table, query_so_far, column_sym) + query = arel_table. + from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). + project(Arel::Nodes::NamedFunction.new("AVG", [arel_table[column_sym]]).as('median')). + where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"), + Arel::Nodes::And.new([Arel.sql('@ct/2.0'), + Arel.sql('@ct/2.0 + 1')]))). + # Disallow negative values + where(arel_table[column_sym].gteq(0)) + + [Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), + Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), + Arel.sql("set @row_id := 0;"), + query, + Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")] + end - # https://www.periscopedata.com/blog/medians-in-sql.html def pg_median_datetime(arel_table, query_so_far, column_sym) # Create a CTE with the column we're operating on, row number (after sorting by the column # we're operating on), and count of the table we're operating on (duplicated across) all rows @@ -19,13 +41,13 @@ module DatabaseMedian # 5 | 1 | 3 # 9 | 2 | 3 # 15 | 3 | 3 - cte_table = Arel::Table.new(("ordered_records")) + cte_table = Arel::Table.new("ordered_records") cte = Arel::Nodes::As.new(cte_table, arel_table. project(arel_table[column_sym].as(column_sym.to_s), - Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), - Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), - arel_table.project("COUNT(1)").as('ct')). + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), + Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), + arel_table.project("COUNT(1)").as('ct')). # Disallow negative values where(arel_table[column_sym].gteq(zero_interval))) @@ -36,19 +58,19 @@ module DatabaseMedian [extract_epoch(cte_table[column_sym])], "median")). where(Arel::Nodes::Between.new(cte_table[:row_id], - Arel::Nodes::And.new([(cte_table[:ct] / Arel::Nodes::SqlLiteral.new('2.0')), - (cte_table[:ct] / Arel::Nodes::SqlLiteral.new('2.0') + 1)]))). + Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)]))). with(query_so_far, cte) end private def extract_epoch(arel_attribute) - Arel::Nodes::SqlLiteral.new("EXTRACT(EPOCH FROM \"#{arel_attribute.relation.name}\".\"#{arel_attribute.name}\")") + Arel.sql("EXTRACT(EPOCH FROM \"#{arel_attribute.relation.name}\".\"#{arel_attribute.name}\")") end # Need to cast '0' to an INTERVAL before we can check if the interval is positive def zero_interval - Arel::Nodes::NamedFunction.new("CAST", [Arel::Nodes::SqlLiteral.new("'0' AS INTERVAL")]) + Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")]) end end diff --git a/app/models/cycle_analytics.rb b/app/models/cycle_analytics.rb index 561ddd258db..ef3f1bbcab5 100644 --- a/app/models/cycle_analytics.rb +++ b/app/models/cycle_analytics.rb @@ -61,18 +61,10 @@ class CycleAnalytics cte_table = Arel::Table.new("cte_table_for_#{name}") # Add a `SELECT` for (end_time - start-time), and add an alias for it. - # Note: We use COALESCE to pick up the first non-null column for end_time / start_time. - query = Arel::Nodes::As.new( - cte_table, - base_query.project( - Arel::Nodes::Subtraction.new( - Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs)), - Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs)) - ).as(name.to_s))) - - query = median_datetime(cte_table, query, name) - median = ActiveRecord::Base.connection.execute(query.to_sql).first['median'] - median.to_f if median.present? + query = Arel::Nodes::As.new(cte_table, subtract_datetimes(base_query, end_time_attrs, start_time_attrs, name.to_s)) + queries = Array.wrap(median_datetime(cte_table, query, name)) + results = queries.map { |query| run_query(query) } + extract_median(results).presence end # Join table with a row for every <issue,merge_request> pair (where the merge request @@ -96,4 +88,43 @@ class CycleAnalytics # Limit to merge requests that have been deployed to production after `@from` query.where(TableReferences.merge_request_metrics[:first_deployed_to_production_at].gteq(@from)) end + + # Note: We use COALESCE to pick up the first non-null column for end_time / start_time. + def subtract_datetimes(query_so_far, end_time_attrs, start_time_attrs, as) + diff_fn = case ActiveRecord::Base.connection.adapter_name + when 'PostgreSQL' + Arel::Nodes::Subtraction.new( + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs)), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs))) + when 'Mysql2' + Arel::Nodes::NamedFunction.new( + "TIMESTAMPDIFF", + [Arel.sql('second'), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs)), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs))]) + else + raise NotImplementedError, "Cycle analytics doesn't support your database type." + end + + query_so_far.project(diff_fn.as(as)) + end + + def run_query(query) + if query.is_a? String + ActiveRecord::Base.connection.execute query + else + ActiveRecord::Base.connection.execute query.to_sql + end + end + + def extract_median(results) + result = results.compact.first + + case ActiveRecord::Base.connection.adapter_name + when 'PostgreSQL' + result.first['median'].to_f + when 'Mysql2' + result.to_a.flatten.first + end + end end |