summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2017-07-27 19:42:15 +0200
committerYorick Peterse <yorickpeterse@gmail.com>2017-08-10 17:45:49 +0200
commitaac1de46c9be659b74da12f704412f38292974db (patch)
tree69882b53b3fff8adfe6a049a61708c4c4c39a674
parent0395c47193b3bbf6b4f060f28c9f632580313a35 (diff)
downloadgitlab-ce-aac1de46c9be659b74da12f704412f38292974db.tar.gz
Use a specialized class for querying events
This changes various controllers to use the new EventCollection class for retrieving events. This class uses a JOIN LATERAL query on PostgreSQL to retrieve queries in a more efficient way, while falling back to a simpler / less efficient query for MySQL. The EventCollection class also includes a limit on the number of events to display to prevent malicious users from cycling through all events, as doing so could put a lot of pressure on the database. JOIN LATERAL is only supported on PostgreSQL starting with version 9.3.0 and as such this optimisation is only used when using PostgreSQL 9.3 or newer.
-rw-r--r--app/controllers/dashboard/projects_controller.rb8
-rw-r--r--app/controllers/dashboard_controller.rb6
-rw-r--r--app/controllers/groups_controller.rb6
-rw-r--r--app/controllers/projects_controller.rb9
-rw-r--r--app/models/event.rb9
-rw-r--r--app/models/event_collection.rb98
-rw-r--r--changelogs/unreleased/use-a-specialized-class-for-querying-events.yml4
-rw-r--r--db/migrate/20170727123534_add_index_on_events_project_id_id.rb37
-rw-r--r--db/schema.rb4
-rw-r--r--lib/gitlab/database.rb4
-rw-r--r--spec/lib/gitlab/database_spec.rb22
-rw-r--r--spec/models/event_collection_spec.rb51
12 files changed, 241 insertions, 17 deletions
diff --git a/app/controllers/dashboard/projects_controller.rb b/app/controllers/dashboard/projects_controller.rb
index 74fe45e1ff6..f71ab702e71 100644
--- a/app/controllers/dashboard/projects_controller.rb
+++ b/app/controllers/dashboard/projects_controller.rb
@@ -52,8 +52,10 @@ class Dashboard::ProjectsController < Dashboard::ApplicationController
end
def load_events
- @events = Event.in_projects(load_projects(params.merge(non_public: true)))
- @events = event_filter.apply_filter(@events).with_associations
- @events = @events.limit(20).offset(params[:offset] || 0)
+ projects = load_projects(params.merge(non_public: true))
+
+ @events = EventCollection
+ .new(projects, offset: params[:offset].to_i, filter: event_filter)
+ .to_a
end
end
diff --git a/app/controllers/dashboard_controller.rb b/app/controllers/dashboard_controller.rb
index f9c31920302..19a5db6fd17 100644
--- a/app/controllers/dashboard_controller.rb
+++ b/app/controllers/dashboard_controller.rb
@@ -29,9 +29,9 @@ class DashboardController < Dashboard::ApplicationController
current_user.authorized_projects
end
- @events = Event.in_projects(projects)
- @events = @event_filter.apply_filter(@events).with_associations
- @events = @events.limit(20).offset(params[:offset] || 0)
+ @events = EventCollection
+ .new(projects, offset: params[:offset].to_i, filter: @event_filter)
+ .to_a
end
def set_show_full_reference
diff --git a/app/controllers/groups_controller.rb b/app/controllers/groups_controller.rb
index 27137ffde54..f76b3f69e9e 100644
--- a/app/controllers/groups_controller.rb
+++ b/app/controllers/groups_controller.rb
@@ -160,9 +160,9 @@ class GroupsController < Groups::ApplicationController
end
def load_events
- @events = Event.in_projects(@projects)
- @events = event_filter.apply_filter(@events).with_associations
- @events = @events.limit(20).offset(params[:offset] || 0)
+ @events = EventCollection
+ .new(@projects, offset: params[:offset].to_i, filter: event_filter)
+ .to_a
end
def user_actions
diff --git a/app/controllers/projects_controller.rb b/app/controllers/projects_controller.rb
index 8dfe0f51709..e93f34498d6 100644
--- a/app/controllers/projects_controller.rb
+++ b/app/controllers/projects_controller.rb
@@ -301,10 +301,11 @@ class ProjectsController < Projects::ApplicationController
end
def load_events
- @events = @project.events.recent
- @events = event_filter.apply_filter(@events).with_associations
- limit = (params[:limit] || 20).to_i
- @events = @events.limit(limit).offset(params[:offset] || 0)
+ projects = Project.where(id: @project.id)
+
+ @events = EventCollection
+ .new(projects, offset: params[:offset].to_i, filter: event_filter)
+ .to_a
end
def project_params
diff --git a/app/models/event.rb b/app/models/event.rb
index a598eb08e82..f2a560a6b56 100644
--- a/app/models/event.rb
+++ b/app/models/event.rb
@@ -62,8 +62,13 @@ class Event < ActiveRecord::Base
scope :recent, -> { reorder(id: :desc) }
scope :code_push, -> { where(action: PUSHED) }
- scope :in_projects, ->(projects) do
- where(project_id: projects.pluck(:id)).recent
+ scope :in_projects, -> (projects) do
+ sub_query = projects
+ .except(:order)
+ .select(1)
+ .where('projects.id = events.project_id')
+
+ where('EXISTS (?)', sub_query).recent
end
scope :with_associations, -> do
diff --git a/app/models/event_collection.rb b/app/models/event_collection.rb
new file mode 100644
index 00000000000..8b8244314af
--- /dev/null
+++ b/app/models/event_collection.rb
@@ -0,0 +1,98 @@
+# A collection of events to display in an event list.
+#
+# An EventCollection is meant to be used for displaying events to a user (e.g.
+# in a controller), it's not suitable for building queries that are used for
+# building other queries.
+class EventCollection
+ # To prevent users from putting too much pressure on the database by cycling
+ # through thousands of events we put a limit on the number of pages.
+ MAX_PAGE = 10
+
+ # projects - An ActiveRecord::Relation object that returns the projects for
+ # which to retrieve events.
+ # filter - An EventFilter instance to use for filtering events.
+ def initialize(projects, limit: 20, offset: 0, filter: nil)
+ @projects = projects
+ @limit = limit
+ @offset = offset
+ @filter = filter
+ end
+
+ # Returns an Array containing the events.
+ def to_a
+ return [] if current_page > MAX_PAGE
+
+ relation = if Gitlab::Database.join_lateral_supported?
+ relation_with_join_lateral
+ else
+ relation_without_join_lateral
+ end
+
+ relation.with_associations.to_a
+ end
+
+ private
+
+ # Returns the events relation to use when JOIN LATERAL is not supported.
+ #
+ # This relation simply gets all the events for all authorized projects, then
+ # limits that set.
+ def relation_without_join_lateral
+ events = filtered_events.in_projects(projects)
+
+ paginate_events(events)
+ end
+
+ # Returns the events relation to use when JOIN LATERAL is supported.
+ #
+ # This relation is built using JOIN LATERAL, producing faster queries than a
+ # regular LIMIT + OFFSET approach.
+ def relation_with_join_lateral
+ projects_for_lateral = projects.select(:id).to_sql
+
+ lateral = filtered_events
+ .limit(limit_for_join_lateral)
+ .where('events.project_id = projects_for_lateral.id')
+ .to_sql
+
+ # The outer query does not need to re-apply the filters since the JOIN
+ # LATERAL body already takes care of this.
+ outer = base_relation
+ .from("(#{projects_for_lateral}) projects_for_lateral")
+ .joins("JOIN LATERAL (#{lateral}) AS #{Event.table_name} ON true")
+
+ paginate_events(outer)
+ end
+
+ def filtered_events
+ @filter ? @filter.apply_filter(base_relation) : base_relation
+ end
+
+ def paginate_events(events)
+ events.limit(@limit).offset(@offset)
+ end
+
+ def base_relation
+ # We want to have absolute control over the event queries being built, thus
+ # we're explicitly opting out of any default scopes that may be set.
+ Event.unscoped.recent
+ end
+
+ def limit_for_join_lateral
+ # Applying the OFFSET on the inside of a JOIN LATERAL leads to incorrect
+ # results. To work around this we need to increase the inner limit for every
+ # page.
+ #
+ # This means that on page 1 we use LIMIT 20, and an outer OFFSET of 0. On
+ # page 2 we use LIMIT 40 and an outer OFFSET of 20.
+ @limit + @offset
+ end
+
+ def current_page
+ (@offset / @limit) + 1
+ end
+
+ def projects
+ @projects.except(:order)
+ end
+end
diff --git a/changelogs/unreleased/use-a-specialized-class-for-querying-events.yml b/changelogs/unreleased/use-a-specialized-class-for-querying-events.yml
new file mode 100644
index 00000000000..6c1ec10aa12
--- /dev/null
+++ b/changelogs/unreleased/use-a-specialized-class-for-querying-events.yml
@@ -0,0 +1,4 @@
+---
+title: Use a specialized class for querying events to improve performance
+merge_request:
+author:
diff --git a/db/migrate/20170727123534_add_index_on_events_project_id_id.rb b/db/migrate/20170727123534_add_index_on_events_project_id_id.rb
new file mode 100644
index 00000000000..1c4aaaf9dd6
--- /dev/null
+++ b/db/migrate/20170727123534_add_index_on_events_project_id_id.rb
@@ -0,0 +1,37 @@
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class AddIndexOnEventsProjectIdId < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ COLUMNS = %i[project_id id].freeze
+ TABLES = %i[events events_for_migration].freeze
+
+ disable_ddl_transaction!
+
+ def up
+ TABLES.each do |table|
+ add_concurrent_index(table, COLUMNS) unless index_exists?(table, COLUMNS)
+
+ # We remove the index _after_ adding the new one since MySQL doesn't let
+ # you remove an index when a foreign key exists for the same column.
+ if index_exists?(table, :project_id)
+ remove_concurrent_index(table, :project_id)
+ end
+ end
+ end
+
+ def down
+ TABLES.each do |table|
+ unless index_exists?(table, :project_id)
+ add_concurrent_index(table, :project_id)
+ end
+
+ unless index_exists?(table, COLUMNS)
+ remove_concurrent_index(table, COLUMNS)
+ end
+ end
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 8a2df991f0d..d8e8ef41758 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -530,7 +530,7 @@ ActiveRecord::Schema.define(version: 20170807160457) do
add_index "events", ["action"], name: "index_events_on_action", using: :btree
add_index "events", ["author_id"], name: "index_events_on_author_id", using: :btree
add_index "events", ["created_at"], name: "index_events_on_created_at", using: :btree
- add_index "events", ["project_id"], name: "index_events_on_project_id", using: :btree
+ add_index "events", ["project_id", "id"], name: "index_events_on_project_id_and_id", using: :btree
add_index "events", ["target_id"], name: "index_events_on_target_id", using: :btree
add_index "events", ["target_type"], name: "index_events_on_target_type", using: :btree
@@ -546,7 +546,7 @@ ActiveRecord::Schema.define(version: 20170807160457) do
add_index "events_for_migration", ["action"], name: "index_events_for_migration_on_action", using: :btree
add_index "events_for_migration", ["author_id"], name: "index_events_for_migration_on_author_id", using: :btree
- add_index "events_for_migration", ["project_id"], name: "index_events_for_migration_on_project_id", using: :btree
+ add_index "events_for_migration", ["project_id", "id"], name: "index_events_for_migration_on_project_id_and_id", using: :btree
add_index "events_for_migration", ["target_type", "target_id"], name: "index_events_for_migration_on_target_type_and_target_id", using: :btree
create_table "feature_gates", force: :cascade do |t|
diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb
index d7dab584a44..e001d25e7b7 100644
--- a/lib/gitlab/database.rb
+++ b/lib/gitlab/database.rb
@@ -25,6 +25,10 @@ module Gitlab
database_version.match(/\A(?:PostgreSQL |)([^\s]+).*\z/)[1]
end
+ def self.join_lateral_supported?
+ postgresql? && version.to_f >= 9.3
+ end
+
def self.nulls_last_order(field, direction = 'ASC')
order = "#{field} #{direction}"
diff --git a/spec/lib/gitlab/database_spec.rb b/spec/lib/gitlab/database_spec.rb
index c5f9aecd867..5fa94999d25 100644
--- a/spec/lib/gitlab/database_spec.rb
+++ b/spec/lib/gitlab/database_spec.rb
@@ -51,6 +51,28 @@ describe Gitlab::Database do
end
end
+ describe '.join_lateral_supported?' do
+ it 'returns false when using MySQL' do
+ allow(described_class).to receive(:postgresql?).and_return(false)
+
+ expect(described_class.join_lateral_supported?).to eq(false)
+ end
+
+ it 'returns false when using PostgreSQL 9.2' do
+ allow(described_class).to receive(:postgresql?).and_return(true)
+ allow(described_class).to receive(:version).and_return('9.2.1')
+
+ expect(described_class.join_lateral_supported?).to eq(false)
+ end
+
+ it 'returns true when using PostgreSQL 9.3.0 or newer' do
+ allow(described_class).to receive(:postgresql?).and_return(true)
+ allow(described_class).to receive(:version).and_return('9.3.0')
+
+ expect(described_class.join_lateral_supported?).to eq(true)
+ end
+ end
+
describe '.nulls_last_order' do
context 'when using PostgreSQL' do
before do
diff --git a/spec/models/event_collection_spec.rb b/spec/models/event_collection_spec.rb
new file mode 100644
index 00000000000..e0a87c18cc7
--- /dev/null
+++ b/spec/models/event_collection_spec.rb
@@ -0,0 +1,51 @@
+require 'spec_helper'
+
+describe EventCollection do
+ describe '#to_a' do
+ let(:project) { create(:project_empty_repo) }
+ let(:projects) { Project.where(id: project.id) }
+ let(:user) { create(:user) }
+
+ before do
+ 20.times do
+ event = create(:push_event, project: project, author: user)
+
+ create(:push_event_payload, event: event)
+ end
+
+ create(:closed_issue_event, project: project, author: user)
+ end
+
+ it 'returns an Array of events' do
+ events = described_class.new(projects).to_a
+
+ expect(events).to be_an_instance_of(Array)
+ end
+
+ it 'applies a limit to the number of events' do
+ events = described_class.new(projects).to_a
+
+ expect(events.length).to eq(20)
+ end
+
+ it 'can paginate through events' do
+ events = described_class.new(projects, offset: 20).to_a
+
+ expect(events.length).to eq(1)
+ end
+
+ it 'returns an empty Array when crossing the maximum page number' do
+ events = described_class.new(projects, limit: 1, offset: 15).to_a
+
+ expect(events).to be_empty
+ end
+
+ it 'allows filtering of events using an EventFilter' do
+ filter = EventFilter.new(EventFilter.issue)
+ events = described_class.new(projects, filter: filter).to_a
+
+ expect(events.length).to eq(1)
+ expect(events[0].action).to eq(Event::CLOSED)
+ end
+ end
+end