From 8957293d9bd0d711db3af26182205c2fe4125194 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Tue, 20 Sep 2016 15:06:54 +0530 Subject: Implement review comments from @yorickpeterse 1. Change multiple updates to a single `update_all` 2. Use cascading deletes 3. Extract an average function for the database median. 4. Move database median to `lib/gitlab/database` 5. Use `delete_all` instead of `destroy_all` 6. Minor refactoring --- lib/gitlab/database/median.rb | 80 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 lib/gitlab/database/median.rb (limited to 'lib') diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb new file mode 100644 index 00000000000..3ede0c6acd9 --- /dev/null +++ b/lib/gitlab/database/median.rb @@ -0,0 +1,80 @@ +# https://www.periscopedata.com/blog/medians-in-sql.html +module Gitlab + module Database + module Median + def median_datetime(arel_table, query_so_far, column_sym) + case ActiveRecord::Base.connection.adapter_name + when 'PostgreSQL' + pg_median_datetime(arel_table, query_so_far, column_sym) + when 'Mysql2' + mysql_median_datetime(arel_table, query_so_far, column_sym) + else + raise NotImplementedError, "We haven't implemented a database median strategy for your database type." + end + end + + def mysql_median_datetime(arel_table, query_so_far, column_sym) + query = arel_table. + from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). + project(average([arel_table[column_sym]], 'median')). + where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"), + Arel::Nodes::And.new([Arel.sql('@ct/2.0'), + Arel.sql('@ct/2.0 + 1')]))). + # Disallow negative values + where(arel_table[column_sym].gteq(0)) + + [Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), + Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), + Arel.sql("set @row_id := 0;"), + query, + Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")] + end + + def pg_median_datetime(arel_table, query_so_far, column_sym) + # Create a CTE with the column we're operating on, row number (after sorting by the column + # we're operating on), and count of the table we're operating on (duplicated across) all rows + # of the CTE. For example, if we're looking to find the median of the `projects.star_count` + # column, the CTE might look like this: + # + # star_count | row_id | ct + # ------------+--------+---- + # 5 | 1 | 3 + # 9 | 2 | 3 + # 15 | 3 | 3 + cte_table = Arel::Table.new("ordered_records") + cte = Arel::Nodes::As.new(cte_table, + arel_table. + project(arel_table[column_sym].as(column_sym.to_s), + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), + Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), + arel_table.project("COUNT(1)").as('ct')). + # Disallow negative values + where(arel_table[column_sym].gteq(zero_interval))) + + # From the CTE, select either the middle row or the middle two rows (this is accomplished + # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the + # selected rows, and this is the median value. + cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")). + where(Arel::Nodes::Between.new(cte_table[:row_id], + Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)]))). + with(query_so_far, cte) + end + + private + + def average(args, as) + Arel::Nodes::NamedFunction.new("AVG", args, as) + end + + def extract_epoch(arel_attribute) + Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")}) + end + + # Need to cast '0' to an INTERVAL before we can check if the interval is positive + def zero_interval + Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")]) + end + end + end +end -- cgit v1.2.1 From 71d4bf721be6cd57ab3480bc5efb11a91d6e1891 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Tue, 20 Sep 2016 18:13:11 +0530 Subject: Implement (some) comments from @DouweM's review. - Move things common to `Issue` and `MergeRequest` into `Issuable` - Move more database-specific functions into `Gitlab::Database` - Indentation changes and other minor refactorings. --- lib/gitlab/database/date_time.rb | 27 +++++++++++++++++++ lib/gitlab/database/median.rb | 58 +++++++++++++++++++++++++--------------- 2 files changed, 64 insertions(+), 21 deletions(-) create mode 100644 lib/gitlab/database/date_time.rb (limited to 'lib') diff --git a/lib/gitlab/database/date_time.rb b/lib/gitlab/database/date_time.rb new file mode 100644 index 00000000000..b6a89f715fd --- /dev/null +++ b/lib/gitlab/database/date_time.rb @@ -0,0 +1,27 @@ +module Gitlab + module Database + module DateTime + # Find the first of the `end_time_attrs` that isn't `NULL`. Subtract from it + # the first of the `start_time_attrs` that isn't NULL. `SELECT` the resulting interval + # along with an alias specified by the `as` parameter. + # + # Note: For MySQL, the interval is returned in seconds. + # For PostgreSQL, the interval is returned as an INTERVAL type. + def subtract_datetimes(query_so_far, end_time_attrs, start_time_attrs, as) + diff_fn = if Gitlab::Database.postgresql? + Arel::Nodes::Subtraction.new( + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs)), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs))) + elsif Gitlab::Database.mysql? + Arel::Nodes::NamedFunction.new( + "TIMESTAMPDIFF", + [Arel.sql('second'), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs)), + Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs))]) + end + + query_so_far.project(diff_fn.as(as)) + end + end + end +end diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 3ede0c6acd9..21b4c043655 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -3,13 +3,22 @@ module Gitlab module Database module Median def median_datetime(arel_table, query_so_far, column_sym) - case ActiveRecord::Base.connection.adapter_name - when 'PostgreSQL' + if Gitlab::Database.postgresql? pg_median_datetime(arel_table, query_so_far, column_sym) - when 'Mysql2' + elsif Gitlab::Database.mysql? mysql_median_datetime(arel_table, query_so_far, column_sym) - else - raise NotImplementedError, "We haven't implemented a database median strategy for your database type." + end + end + + def extract_median(results) + result = results.compact.first + + if Gitlab::Database.postgresql? + result = result.first.presence + median = result['median'] if result + median.to_f if median + elsif Gitlab::Database.mysql? + result.to_a.flatten.first end end @@ -17,17 +26,23 @@ module Gitlab query = arel_table. from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). project(average([arel_table[column_sym]], 'median')). - where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"), - Arel::Nodes::And.new([Arel.sql('@ct/2.0'), - Arel.sql('@ct/2.0 + 1')]))). + where(Arel::Nodes::Between.new( + Arel.sql("(select @row_id := @row_id + 1)"), + Arel::Nodes::And.new( + [Arel.sql('@ct/2.0'), + Arel.sql('@ct/2.0 + 1')] + ) + )). # Disallow negative values where(arel_table[column_sym].gteq(0)) - [Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), - Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), - Arel.sql("set @row_id := 0;"), - query, - Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")] + [ + Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), + Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), + Arel.sql("set @row_id := 0;"), + query, + Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};") + ] end def pg_median_datetime(arel_table, query_so_far, column_sym) @@ -42,14 +57,15 @@ module Gitlab # 9 | 2 | 3 # 15 | 3 | 3 cte_table = Arel::Table.new("ordered_records") - cte = Arel::Nodes::As.new(cte_table, - arel_table. - project(arel_table[column_sym].as(column_sym.to_s), - Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), - Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), - arel_table.project("COUNT(1)").as('ct')). - # Disallow negative values - where(arel_table[column_sym].gteq(zero_interval))) + cte = Arel::Nodes::As.new( + cte_table, + arel_table. + project(arel_table[column_sym].as(column_sym.to_s), + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), + Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), + arel_table.project("COUNT(1)").as('ct')). + # Disallow negative values + where(arel_table[column_sym].gteq(zero_interval))) # From the CTE, select either the middle row or the middle two rows (this is accomplished # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the -- cgit v1.2.1 From 231a9f5b8788aa0bbb2fa304f8b937f7c09fcee1 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Tue, 20 Sep 2016 18:21:59 +0530 Subject: Fix rubocop spec. And `scss_lint` --- lib/gitlab/database/median.rb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 21b4c043655..94f060bd2c7 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -27,12 +27,11 @@ module Gitlab from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). project(average([arel_table[column_sym]], 'median')). where(Arel::Nodes::Between.new( - Arel.sql("(select @row_id := @row_id + 1)"), - Arel::Nodes::And.new( - [Arel.sql('@ct/2.0'), - Arel.sql('@ct/2.0 + 1')] - ) - )). + Arel.sql("(select @row_id := @row_id + 1)"), + Arel::Nodes::And.new( + [Arel.sql('@ct/2.0'), + Arel.sql('@ct/2.0 + 1')] + ))). # Disallow negative values where(arel_table[column_sym].gteq(0)) -- cgit v1.2.1 From 918e589c2b29c18d9fe3a8e6c93a3f490c86beb1 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Wed, 21 Sep 2016 00:47:37 +0530 Subject: Implement a second round of review comments from @DouweM. - Don't use `TableReferences` - using `.arel_table` is shorter! - Move some database-related code to `Gitlab::Database` - Remove the `MergeRequest#issues_closed` and `Issue#closed_by_merge_requests` associations. They were either shadowing or were too similar to existing methods. They are not being used anywhere, so it's better to remove them to reduce confusion. - Use Rails 3-style validations - Index for `MergeRequest::Metrics#first_deployed_to_production_at` - Only include `CycleAnalyticsHelpers::TestGeneration` for specs that need it. - Other minor refactorings. --- lib/gitlab/database/median.rb | 35 +++++++++++++++++++++-------------- lib/gitlab/database/util.rb | 12 ++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 lib/gitlab/database/util.rb (limited to 'lib') diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 94f060bd2c7..9adacd64934 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -3,11 +3,15 @@ module Gitlab module Database module Median def median_datetime(arel_table, query_so_far, column_sym) - if Gitlab::Database.postgresql? - pg_median_datetime(arel_table, query_so_far, column_sym) - elsif Gitlab::Database.mysql? - mysql_median_datetime(arel_table, query_so_far, column_sym) - end + median_queries = + if Gitlab::Database.postgresql? + pg_median_datetime_sql(arel_table, query_so_far, column_sym) + elsif Gitlab::Database.mysql? + mysql_median_datetime_sql(arel_table, query_so_far, column_sym) + end + + results = Array.wrap(median_queries).map { |query| Util.run_query(query) } + extract_median(results).presence end def extract_median(results) @@ -22,7 +26,7 @@ module Gitlab end end - def mysql_median_datetime(arel_table, query_so_far, column_sym) + def mysql_median_datetime_sql(arel_table, query_so_far, column_sym) query = arel_table. from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). project(average([arel_table[column_sym]], 'median')). @@ -44,7 +48,7 @@ module Gitlab ] end - def pg_median_datetime(arel_table, query_so_far, column_sym) + def pg_median_datetime_sql(arel_table, query_so_far, column_sym) # Create a CTE with the column we're operating on, row number (after sorting by the column # we're operating on), and count of the table we're operating on (duplicated across) all rows # of the CTE. For example, if we're looking to find the median of the `projects.star_count` @@ -59,10 +63,11 @@ module Gitlab cte = Arel::Nodes::As.new( cte_table, arel_table. - project(arel_table[column_sym].as(column_sym.to_s), - Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), - Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), - arel_table.project("COUNT(1)").as('ct')). + project( + arel_table[column_sym].as(column_sym.to_s), + Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []), + Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'), + arel_table.project("COUNT(1)").as('ct')). # Disallow negative values where(arel_table[column_sym].gteq(zero_interval))) @@ -70,9 +75,11 @@ module Gitlab # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the # selected rows, and this is the median value. cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")). - where(Arel::Nodes::Between.new(cte_table[:row_id], - Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')), - (cte_table[:ct] / Arel.sql('2.0') + 1)]))). + where(Arel::Nodes::Between.new( + cte_table[:row_id], + Arel::Nodes::And.new( + [(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)]))). with(query_so_far, cte) end diff --git a/lib/gitlab/database/util.rb b/lib/gitlab/database/util.rb new file mode 100644 index 00000000000..12b68deae89 --- /dev/null +++ b/lib/gitlab/database/util.rb @@ -0,0 +1,12 @@ +module Gitlab + module Database + module Util + class << self + def run_query(query) + query = query.to_sql unless query.is_a?(String) + ActiveRecord::Base.connection.execute(query) + end + end + end + end +end -- cgit v1.2.1 From 6df2d57394a48ac0528af722a41c41bcbc8166b2 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Wed, 21 Sep 2016 02:18:57 +0530 Subject: Improve indentation in `Gitlab::Database::Median` --- lib/gitlab/database/median.rb | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 9adacd64934..5db05b98d5a 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -30,12 +30,15 @@ module Gitlab query = arel_table. from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)). project(average([arel_table[column_sym]], 'median')). - where(Arel::Nodes::Between.new( - Arel.sql("(select @row_id := @row_id + 1)"), - Arel::Nodes::And.new( - [Arel.sql('@ct/2.0'), - Arel.sql('@ct/2.0 + 1')] - ))). + where( + Arel::Nodes::Between.new( + Arel.sql("(select @row_id := @row_id + 1)"), + Arel::Nodes::And.new( + [Arel.sql('@ct/2.0'), + Arel.sql('@ct/2.0 + 1')] + ) + ) + ). # Disallow negative values where(arel_table[column_sym].gteq(0)) @@ -75,11 +78,15 @@ module Gitlab # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the # selected rows, and this is the median value. cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")). - where(Arel::Nodes::Between.new( - cte_table[:row_id], - Arel::Nodes::And.new( - [(cte_table[:ct] / Arel.sql('2.0')), - (cte_table[:ct] / Arel.sql('2.0') + 1)]))). + where( + Arel::Nodes::Between.new( + cte_table[:row_id], + Arel::Nodes::And.new( + [(cte_table[:ct] / Arel.sql('2.0')), + (cte_table[:ct] / Arel.sql('2.0') + 1)] + ) + ) + ). with(query_so_far, cte) end -- cgit v1.2.1 From 244ec0a84c969454bfa05f66dedb22f2b1172323 Mon Sep 17 00:00:00 2001 From: Timothy Andrew Date: Wed, 21 Sep 2016 09:57:14 +0530 Subject: Implement fourth round of comments from @DouweM. - Pluralize summary titles - Remove the `run_query` method - always return sql strings from the `date_time_sql` methods --- lib/gitlab/database/median.rb | 9 ++++++--- lib/gitlab/database/util.rb | 12 ------------ 2 files changed, 6 insertions(+), 15 deletions(-) delete mode 100644 lib/gitlab/database/util.rb (limited to 'lib') diff --git a/lib/gitlab/database/median.rb b/lib/gitlab/database/median.rb index 5db05b98d5a..1444d25ebc7 100644 --- a/lib/gitlab/database/median.rb +++ b/lib/gitlab/database/median.rb @@ -10,7 +10,9 @@ module Gitlab mysql_median_datetime_sql(arel_table, query_so_far, column_sym) end - results = Array.wrap(median_queries).map { |query| Util.run_query(query) } + results = Array.wrap(median_queries).map do |query| + ActiveRecord::Base.connection.execute(query) + end extract_median(results).presence end @@ -46,7 +48,7 @@ module Gitlab Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"), Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"), Arel.sql("set @row_id := 0;"), - query, + query.to_sql, Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};") ] end @@ -87,7 +89,8 @@ module Gitlab ) ) ). - with(query_so_far, cte) + with(query_so_far, cte). + to_sql end private diff --git a/lib/gitlab/database/util.rb b/lib/gitlab/database/util.rb deleted file mode 100644 index 12b68deae89..00000000000 --- a/lib/gitlab/database/util.rb +++ /dev/null @@ -1,12 +0,0 @@ -module Gitlab - module Database - module Util - class << self - def run_query(query) - query = query.to_sql unless query.is_a?(String) - ActiveRecord::Base.connection.execute(query) - end - end - end - end -end -- cgit v1.2.1