From f11030173b909c64aa23ce7482af8fcee8554be9 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Tue, 12 Mar 2019 09:51:37 +0000 Subject: Batch insert CI rspec_profiling data Instead of inserting a row after each example to an external database, we save the CI profiling reports into the `rspec_profiling` directory and insert the data in the update-tests-metadata CI stage. This should make each spec run faster and also reduce the number of PostgreSQL connections needed by concurrent CI builds. `scripts/insert-rspec-profiling-data` also inserts one file at a time via the PostgreSQL COPY command for faster inserts. The one side effect is that the `created_at` and `updated_at` timestamps aren't available since they aren't generated in the CSV. Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/10154 --- .gitlab-ci.yml | 6 +++- config/initializers/rspec_profiling.rb | 50 ++++++++++++++++++++++++---------- scripts/insert-rspec-profiling-data | 47 ++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 16 deletions(-) create mode 100755 scripts/insert-rspec-profiling-data diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8ea1f082787..144063f208f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -66,6 +66,7 @@ stages: paths: - knapsack/ - rspec_flaky/ + - rspec_profiling/ .use-pg: &use-pg services: @@ -159,6 +160,7 @@ stages: - coverage/ - knapsack/ - rspec_flaky/ + - rspec_profiling/ - tmp/capybara/ reports: junit: junit_rspec.xml @@ -336,6 +338,7 @@ retrieve-tests-metadata: - wget -O $KNAPSACK_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$KNAPSACK_RSPEC_SUITE_REPORT_PATH || rm $KNAPSACK_RSPEC_SUITE_REPORT_PATH - '[[ -f $KNAPSACK_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${KNAPSACK_RSPEC_SUITE_REPORT_PATH}' - mkdir -p rspec_flaky/ + - mkdir -p rspec_profiling/ - wget -O $FLAKY_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$FLAKY_RSPEC_SUITE_REPORT_PATH || rm $FLAKY_RSPEC_SUITE_REPORT_PATH - '[[ -f $FLAKY_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${FLAKY_RSPEC_SUITE_REPORT_PATH}' @@ -350,7 +353,7 @@ update-tests-metadata: - rspec_flaky/ policy: push script: - - retry gem install fog-aws mime-types activesupport --no-document + - retry gem install fog-aws mime-types activesupport rspec_profiling postgres-copy --no-document - scripts/merge-reports ${KNAPSACK_RSPEC_SUITE_REPORT_PATH} knapsack/${CI_PROJECT_NAME}/rspec-pg_node_*.json - scripts/merge-reports ${FLAKY_RSPEC_SUITE_REPORT_PATH} rspec_flaky/all_*_*.json - FLAKY_RSPEC_GENERATE_REPORT=1 scripts/prune-old-flaky-specs ${FLAKY_RSPEC_SUITE_REPORT_PATH} @@ -358,6 +361,7 @@ update-tests-metadata: - '[[ -z ${TESTS_METADATA_S3_BUCKET} ]] || scripts/sync-reports put $TESTS_METADATA_S3_BUCKET $FLAKY_RSPEC_SUITE_REPORT_PATH' - rm -f knapsack/${CI_PROJECT_NAME}/*_node_*.json - rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json + - scripts/insert-rspec-profiling-data flaky-examples-check: <<: *dedicated-runner diff --git a/config/initializers/rspec_profiling.rb b/config/initializers/rspec_profiling.rb index 2de310753a9..715e17057e0 100644 --- a/config/initializers/rspec_profiling.rb +++ b/config/initializers/rspec_profiling.rb @@ -1,7 +1,28 @@ +# frozen_string_literal: true + +return unless Rails.env.test? + module RspecProfilingExt - module PSQL - def establish_connection - ::RspecProfiling::Collectors::PSQL::Result.establish_connection(ENV['RSPEC_PROFILING_POSTGRES_URL']) + module Collectors + class CSVWithTimestamps < ::RspecProfiling::Collectors::CSV + TIMESTAMP_FIELDS = %w(created_at updated_at).freeze + HEADERS = (::RspecProfiling::Collectors::CSV::HEADERS + TIMESTAMP_FIELDS).freeze + + def insert(attributes) + output << HEADERS.map do |field| + if TIMESTAMP_FIELDS.include?(field) + Time.now + else + attributes.fetch(field.to_sym) + end + end + end + + private + + def output + @output ||= ::CSV.open(path, "w").tap { |csv| csv << HEADERS } + end end end @@ -10,9 +31,13 @@ module RspecProfilingExt if ENV['CI_COMMIT_REF_NAME'] "#{defined?(Gitlab::License) ? 'ee' : 'ce'}:#{ENV['CI_COMMIT_REF_NAME']}" else - super + super&.chomp end end + + def sha + super&.chomp + end end module Run @@ -30,16 +55,11 @@ module RspecProfilingExt end end -if Rails.env.test? - RspecProfiling.configure do |config| - if ENV['RSPEC_PROFILING_POSTGRES_URL'].present? - RspecProfiling::Collectors::PSQL.prepend(RspecProfilingExt::PSQL) - config.collector = RspecProfiling::Collectors::PSQL - end - - if ENV.key?('CI') - RspecProfiling::VCS::Git.prepend(RspecProfilingExt::Git) - RspecProfiling::Run.prepend(RspecProfilingExt::Run) - end +RspecProfiling.configure do |config| + if ENV.key?('CI') || ENV.key?('RSPEC_PROFILING') + RspecProfiling::VCS::Git.prepend(RspecProfilingExt::Git) + RspecProfiling::Run.prepend(RspecProfilingExt::Run) + config.collector = RspecProfilingExt::Collectors::CSVWithTimestamps + config.csv_path = -> { "rspec_profiling/#{Time.now.to_i}-#{SecureRandom.hex(8)}-rspec-data.csv" } end end diff --git a/scripts/insert-rspec-profiling-data b/scripts/insert-rspec-profiling-data new file mode 100755 index 00000000000..10e337b9972 --- /dev/null +++ b/scripts/insert-rspec-profiling-data @@ -0,0 +1,47 @@ +#!/usr/bin/env ruby + +require 'csv' +require 'rspec_profiling' +require 'postgres-copy' + +module RspecProfiling + module Collectors + class PSQL + def establish_connection + # This disables the automatic creation of the database and + # table. In the future, we may want a way to specify the host of + # the database to connect so that we can call #install. + Result.establish_connection(results_url) + end + + def prepared? + connection.data_source_exists?(table) + end + + def results_url + ENV['RSPEC_PROFILING_POSTGRES_URL'] + end + + class Result < ActiveRecord::Base + acts_as_copy_target + end + end + end +end + +def insert_data(path) + puts "#{Time.now} Inserting CI stats..." + + collector = RspecProfiling::Collectors::PSQL.new + collector.install + + files = Dir[File.join(path, "*.csv")] + + files.each do |filename| + puts "#{Time.now} Inserting #{filename}..." + result = RspecProfiling::Collectors::PSQL::Result.copy_from(filename) + puts "#{Time.now} Inserted #{result.cmd_tuples} lines in #{filename}, DB response: #{result.cmd_status}" + end +end + +insert_data('rspec_profiling') if ENV['RSPEC_PROFILING_POSTGRES_URL'].present? -- cgit v1.2.1