summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2016-10-07 15:24:09 +0200
committerYorick Peterse <yorickpeterse@gmail.com>2016-10-10 12:27:08 +0200
commit237c8f66e6608420629503280aaea555ee980022 (patch)
treefa50e780b93cd9d812383625d987efd45a63f98b
parent434d98b22a6381447a9c59cec16fc324ade198df (diff)
downloadgitlab-ce-precalculate-trending-projects.tar.gz
Precalculate trending projectsprecalculate-trending-projects
This commit introduces a Sidekiq worker that precalculates the list of trending projects on a daily basis. The resulting set is stored in a database table that is then queried by Project.trending. This setup means that Unicorn workers no longer _may_ have to calculate the list of trending projects. Furthermore it supports filtering without any complex caching mechanisms. The data in the "trending_projects" table is inserted in the same order as the project ranking. This means that getting the projects in the correct order is simply a matter of: SELECT projects.* FROM projects INNER JOIN trending_projects ON trending_projects.project_id = projects.id ORDER BY trending_projects.id ASC; Such a query will only take a few milliseconds at most (as measured on GitLab.com), opposed to a few seconds for the query used for calculating the project ranks. The migration in this commit does not require downtime and takes care of populating an initial list of trending projects.
-rw-r--r--app/controllers/explore/projects_controller.rb3
-rw-r--r--app/finders/trending_projects_finder.rb16
-rw-r--r--app/models/project.rb16
-rw-r--r--app/models/trending_project.rb35
-rw-r--r--app/workers/trending_projects_worker.rb11
-rw-r--r--config/initializers/1_settings.rb4
-rw-r--r--db/migrate/20161007133303_precalculate_trending_projects.rb38
-rw-r--r--db/schema.rb9
-rw-r--r--spec/finders/trending_projects_finder_spec.rb48
-rw-r--r--spec/models/project_spec.rb26
-rw-r--r--spec/models/trending_project_spec.rb56
-rw-r--r--spec/workers/trending_projects_worker_spec.rb11
12 files changed, 171 insertions, 102 deletions
diff --git a/app/controllers/explore/projects_controller.rb b/app/controllers/explore/projects_controller.rb
index 38e5943eb76..a62c6211372 100644
--- a/app/controllers/explore/projects_controller.rb
+++ b/app/controllers/explore/projects_controller.rb
@@ -21,8 +21,7 @@ class Explore::ProjectsController < Explore::ApplicationController
end
def trending
- @projects = TrendingProjectsFinder.new.execute
- @projects = filter_projects(@projects)
+ @projects = filter_projects(Project.trending)
@projects = @projects.page(params[:page])
respond_to do |format|
diff --git a/app/finders/trending_projects_finder.rb b/app/finders/trending_projects_finder.rb
deleted file mode 100644
index c1e434d9926..00000000000
--- a/app/finders/trending_projects_finder.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-# Finder for retrieving public trending projects in a given time range.
-class TrendingProjectsFinder
- # current_user - The currently logged in User, if any.
- # last_months - The number of months to limit the trending data to.
- def execute(months_limit = 1)
- Rails.cache.fetch(cache_key_for(months_limit), expires_in: 1.day) do
- Project.public_only.trending(months_limit.months.ago)
- end
- end
-
- private
-
- def cache_key_for(months)
- "trending_projects/#{months}"
- end
-end
diff --git a/app/models/project.rb b/app/models/project.rb
index 88e4bd14860..74d54e69648 100644
--- a/app/models/project.rb
+++ b/app/models/project.rb
@@ -375,19 +375,9 @@ class Project < ActiveRecord::Base
%r{(?<project>#{name_pattern}/#{name_pattern})}
end
- def trending(since = 1.month.ago)
- # By counting in the JOIN we don't expose the GROUP BY to the outer query.
- # This means that calls such as "any?" and "count" just return a number of
- # the total count, instead of the counts grouped per project as a Hash.
- join_body = "INNER JOIN (
- SELECT project_id, COUNT(*) AS amount
- FROM notes
- WHERE created_at >= #{sanitize(since)}
- AND system IS FALSE
- GROUP BY project_id
- ) join_note_counts ON projects.id = join_note_counts.project_id"
-
- joins(join_body).reorder('join_note_counts.amount DESC')
+ def trending
+ joins('INNER JOIN trending_projects ON projects.id = trending_projects.project_id').
+ reorder('trending_projects.id ASC')
end
def cached_count
diff --git a/app/models/trending_project.rb b/app/models/trending_project.rb
new file mode 100644
index 00000000000..27e3732da17
--- /dev/null
+++ b/app/models/trending_project.rb
@@ -0,0 +1,35 @@
+class TrendingProject < ActiveRecord::Base
+ belongs_to :project
+
+ # The number of months to include in the trending calculation.
+ MONTHS_TO_INCLUDE = 1
+
+ # The maximum number of projects to include in the trending set.
+ PROJECTS_LIMIT = 100
+
+ # Populates the trending projects table with the current list of trending
+ # projects.
+ def self.refresh!
+ # The calculation **must** run in a transaction. If the removal of data and
+ # insertion of new data were to run separately a user might end up with an
+ # empty list of trending projects for a short period of time.
+ transaction do
+ delete_all
+
+ timestamp = connection.quote(MONTHS_TO_INCLUDE.months.ago)
+
+ connection.execute <<-EOF.strip_heredoc
+ INSERT INTO #{table_name} (project_id)
+ SELECT project_id
+ FROM notes
+ INNER JOIN projects ON projects.id = notes.project_id
+ WHERE notes.created_at >= #{timestamp}
+ AND notes.system IS FALSE
+ AND projects.visibility_level = #{Gitlab::VisibilityLevel::PUBLIC}
+ GROUP BY project_id
+ ORDER BY count(*) DESC
+ LIMIT #{PROJECTS_LIMIT};
+ EOF
+ end
+ end
+end
diff --git a/app/workers/trending_projects_worker.rb b/app/workers/trending_projects_worker.rb
new file mode 100644
index 00000000000..df4c4a6628b
--- /dev/null
+++ b/app/workers/trending_projects_worker.rb
@@ -0,0 +1,11 @@
+class TrendingProjectsWorker
+ include Sidekiq::Worker
+
+ sidekiq_options queue: :trending_projects
+
+ def perform
+ Rails.logger.info('Refreshing trending projects')
+
+ TrendingProject.refresh!
+ end
+end
diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb
index c5ed2162c92..efe0ac9c965 100644
--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -304,6 +304,10 @@ Settings.cron_jobs['prune_old_events_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['prune_old_events_worker']['cron'] ||= '* */6 * * *'
Settings.cron_jobs['prune_old_events_worker']['job_class'] = 'PruneOldEventsWorker'
+Settings.cron_jobs['trending_projects_worker'] ||= Settingslogic.new({})
+Settings.cron_jobs['trending_projects_worker']['cron'] = '0 1 * * *'
+Settings.cron_jobs['trending_projects_worker']['job_class'] = 'TrendingProjectsWorker'
+
#
# GitLab Shell
#
diff --git a/db/migrate/20161007133303_precalculate_trending_projects.rb b/db/migrate/20161007133303_precalculate_trending_projects.rb
new file mode 100644
index 00000000000..b324cd94268
--- /dev/null
+++ b/db/migrate/20161007133303_precalculate_trending_projects.rb
@@ -0,0 +1,38 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class PrecalculateTrendingProjects < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ def up
+ create_table :trending_projects do |t|
+ t.references :project, index: true, foreign_key: { on_delete: :cascade }, null: false
+ end
+
+ timestamp = connection.quote(1.month.ago)
+
+ # We're hardcoding the visibility level (public) here so that if it ever
+ # changes this query doesn't suddenly use the new value (which may break
+ # later migrations).
+ visibility = 20
+
+ execute <<-EOF.strip_heredoc
+ INSERT INTO trending_projects (project_id)
+ SELECT project_id
+ FROM notes
+ INNER JOIN projects ON projects.id = notes.project_id
+ WHERE notes.created_at >= #{timestamp}
+ AND notes.system IS FALSE
+ AND projects.visibility_level = #{visibility}
+ GROUP BY project_id
+ ORDER BY count(*) DESC
+ LIMIT 100;
+ EOF
+ end
+
+ def down
+ drop_table :trending_projects
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 56da70b3c02..c5ddf9eee32 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema.define(version: 20160926145521) do
+ActiveRecord::Schema.define(version: 20161007133303) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@@ -1070,6 +1070,12 @@ ActiveRecord::Schema.define(version: 20160926145521) do
add_index "todos", ["target_type", "target_id"], name: "index_todos_on_target_type_and_target_id", using: :btree
add_index "todos", ["user_id"], name: "index_todos_on_user_id", using: :btree
+ create_table "trending_projects", force: :cascade do |t|
+ t.integer "project_id", null: false
+ end
+
+ add_index "trending_projects", ["project_id"], name: "index_trending_projects_on_project_id", using: :btree
+
create_table "u2f_registrations", force: :cascade do |t|
t.text "certificate"
t.string "key_handle"
@@ -1212,5 +1218,6 @@ ActiveRecord::Schema.define(version: 20160926145521) do
add_foreign_key "personal_access_tokens", "users"
add_foreign_key "protected_branch_merge_access_levels", "protected_branches"
add_foreign_key "protected_branch_push_access_levels", "protected_branches"
+ add_foreign_key "trending_projects", "projects", on_delete: :cascade
add_foreign_key "u2f_registrations", "users"
end
diff --git a/spec/finders/trending_projects_finder_spec.rb b/spec/finders/trending_projects_finder_spec.rb
deleted file mode 100644
index cfe15b9defa..00000000000
--- a/spec/finders/trending_projects_finder_spec.rb
+++ /dev/null
@@ -1,48 +0,0 @@
-require 'spec_helper'
-
-describe TrendingProjectsFinder do
- let(:user) { create(:user) }
- let(:public_project1) { create(:empty_project, :public) }
- let(:public_project2) { create(:empty_project, :public) }
- let(:private_project) { create(:empty_project, :private) }
- let(:internal_project) { create(:empty_project, :internal) }
-
- before do
- 3.times do
- create(:note_on_commit, project: public_project1)
- end
-
- 2.times do
- create(:note_on_commit, project: public_project2, created_at: 5.weeks.ago)
- end
-
- create(:note_on_commit, project: private_project)
- create(:note_on_commit, project: internal_project)
- end
-
- describe '#execute', caching: true do
- context 'without an explicit time range' do
- it 'returns public trending projects' do
- projects = described_class.new.execute
-
- expect(projects).to eq([public_project1])
- end
- end
-
- context 'with an explicit time range' do
- it 'returns public trending projects' do
- projects = described_class.new.execute(2)
-
- expect(projects).to eq([public_project1, public_project2])
- end
- end
-
- it 'caches the list of projects' do
- projects = described_class.new
-
- expect(Project).to receive(:trending).once
-
- 2.times { projects.execute }
- end
- end
-end
diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb
index 8aadfcb439b..dae546a0cdc 100644
--- a/spec/models/project_spec.rb
+++ b/spec/models/project_spec.rb
@@ -800,32 +800,14 @@ describe Project, models: true do
end
create(:note_on_commit, project: project2)
- end
-
- describe 'without an explicit start date' do
- subject { described_class.trending.to_a }
- it 'sorts Projects by the amount of notes in descending order' do
- expect(subject).to eq([project1, project2])
- end
+ TrendingProject.refresh!
end
- describe 'with an explicit start date' do
- let(:date) { 2.months.ago }
+ subject { described_class.trending.to_a }
- subject { described_class.trending(date).to_a }
-
- before do
- 2.times do
- # Little fix for special issue related to Fractional Seconds support for MySQL.
- # See: https://github.com/rails/rails/pull/14359/files
- create(:note_on_commit, project: project2, created_at: date + 1)
- end
- end
-
- it 'sorts Projects by the amount of notes in descending order' do
- expect(subject).to eq([project2, project1])
- end
+ it 'sorts projects by the amount of notes in descending order' do
+ expect(subject).to eq([project1, project2])
end
it 'does not take system notes into account' do
diff --git a/spec/models/trending_project_spec.rb b/spec/models/trending_project_spec.rb
new file mode 100644
index 00000000000..cc28c6d4004
--- /dev/null
+++ b/spec/models/trending_project_spec.rb
@@ -0,0 +1,56 @@
+require 'spec_helper'
+
+describe TrendingProject do
+ let(:user) { create(:user) }
+ let(:public_project1) { create(:empty_project, :public) }
+ let(:public_project2) { create(:empty_project, :public) }
+ let(:public_project3) { create(:empty_project, :public) }
+ let(:private_project) { create(:empty_project, :private) }
+ let(:internal_project) { create(:empty_project, :internal) }
+
+ before do
+ 3.times do
+ create(:note_on_commit, project: public_project1)
+ end
+
+ 2.times do
+ create(:note_on_commit, project: public_project2)
+ end
+
+ create(:note_on_commit, project: public_project3, created_at: 5.weeks.ago)
+ create(:note_on_commit, project: private_project)
+ create(:note_on_commit, project: internal_project)
+ end
+
+ describe '.refresh!' do
+ before do
+ described_class.refresh!
+ end
+
+ it 'populates the trending projects table' do
+ expect(described_class.count).to eq(2)
+ end
+
+ it 'removes existing rows before populating the table' do
+ described_class.refresh!
+
+ expect(described_class.count).to eq(2)
+ end
+
+ it 'stores the project IDs for every trending project' do
+ rows = described_class.order(id: :asc).all
+
+ expect(rows[0].project_id).to eq(public_project1.id)
+ expect(rows[1].project_id).to eq(public_project2.id)
+ end
+
+ it 'does not store projects that fall out of the trending time range' do
+ expect(described_class.where(project_id: public_project3).any?).to eq(false)
+ end
+
+ it 'stores only public projects' do
+ expect(described_class.where(project_id: [public_project1.id, public_project2.id]).count).to eq(2)
+ expect(described_class.where(project_id: [private_project.id, internal_project.id]).count).to eq(0)
+ end
+ end
+end
diff --git a/spec/workers/trending_projects_worker_spec.rb b/spec/workers/trending_projects_worker_spec.rb
new file mode 100644
index 00000000000..c3c6fdcf2d5
--- /dev/null
+++ b/spec/workers/trending_projects_worker_spec.rb
@@ -0,0 +1,11 @@
+require 'spec_helper'
+
+describe TrendingProjectsWorker do
+ describe '#perform' do
+ it 'refreshes the trending projects' do
+ expect(TrendingProject).to receive(:refresh!)
+
+ described_class.new.perform
+ end
+ end
+end