summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration
diff options
context:
space:
mode:
authorMichael Kozono <mkozono@gmail.com>2017-11-20 16:27:24 -0800
committerMichael Kozono <mkozono@gmail.com>2017-12-01 15:26:41 -0800
commitedb5cac46c1cba1029fb3e67d4853027590584f6 (patch)
tree3f17ff04ba05aea3143a69abd78124417437b024 /lib/gitlab/background_migration
parent17ce21d74eab4d2973d372cb3f97258eb3b81de9 (diff)
downloadgitlab-ce-edb5cac46c1cba1029fb3e67d4853027590584f6.tar.gz
Use bulk inserts
Diffstat (limited to 'lib/gitlab/background_migration')
-rw-r--r--lib/gitlab/background_migration/prepare_untracked_uploads.rb55
1 files changed, 35 insertions, 20 deletions
diff --git a/lib/gitlab/background_migration/prepare_untracked_uploads.rb b/lib/gitlab/background_migration/prepare_untracked_uploads.rb
index c3f5dddb07d..022b2f41393 100644
--- a/lib/gitlab/background_migration/prepare_untracked_uploads.rb
+++ b/lib/gitlab/background_migration/prepare_untracked_uploads.rb
@@ -20,7 +20,19 @@ module Gitlab
def perform
ensure_temporary_tracking_table_exists
+
+ # Since Postgres < 9.5 does not have ON CONFLICT DO NOTHING, and since
+ # doing inserts-if-not-exists without ON CONFLICT DO NOTHING would be
+ # slow, start with an empty table for Postgres < 9.5.
+ # That way we can do bulk inserts at ~30x the speed of individual
+ # inserts (~20 minutes worth of inserts at GitLab.com scale instead of
+ # ~10 hours).
+ # In all other cases, installations will get both bulk inserts and the
+ # ability for these jobs to retry without having to clear and reinsert.
+ clear_untracked_file_paths unless can_bulk_insert_and_ignore_duplicates?
+
store_untracked_file_paths
+
schedule_populate_untracked_uploads_jobs
end
@@ -44,6 +56,10 @@ module Gitlab
end
end
+ def clear_untracked_file_paths
+ UntrackedFile.delete_all
+ end
+
def store_untracked_file_paths
return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR)
@@ -96,36 +112,35 @@ module Gitlab
end
def insert_file_paths(file_paths)
- ActiveRecord::Base.transaction do
- file_paths.each do |file_path|
- insert_file_path(file_path)
- end
- end
- end
+ sql = if postgresql_pre_9_5?
+ "INSERT INTO #{table_columns_and_values_for_insert(file_paths)};"
+ elsif postgresql?
+ "INSERT INTO #{table_columns_and_values_for_insert(file_paths)} ON CONFLICT DO NOTHING;"
+ else # MySQL
+ "INSERT IGNORE INTO #{table_columns_and_values_for_insert(file_paths)};"
+ end
- def insert_file_path(file_path)
- if postgresql_pre_9_5?
- # No easy way to do ON CONFLICT DO NOTHING before Postgres 9.5 so just use Rails
- return UntrackedFile.where(path: file_path).first_or_create
- end
+ ActiveRecord::Base.connection.execute(sql)
+ end
- table_columns_and_values = 'untracked_files_for_uploads (path, created_at, updated_at) VALUES (?, ?, ?)'
+ def table_columns_and_values_for_insert(file_paths)
+ timestamp = Time.now.utc.iso8601
- sql = if postgresql?
- "INSERT INTO #{table_columns_and_values} ON CONFLICT DO NOTHING;"
- else
- "INSERT IGNORE INTO #{table_columns_and_values};"
- end
+ values = file_paths.map do |file_path|
+ ActiveRecord::Base.send(:sanitize_sql_array, ['(?, ?, ?)', file_path, timestamp, timestamp]) # rubocop:disable GitlabSecurity/PublicSend
+ end.join(', ')
- timestamp = Time.now.utc.iso8601
- sql = ActiveRecord::Base.send(:sanitize_sql_array, [sql, file_path, timestamp, timestamp]) # rubocop:disable GitlabSecurity/PublicSend
- ActiveRecord::Base.connection.execute(sql)
+ "#{UntrackedFile.table_name} (path, created_at, updated_at) VALUES #{values}"
end
def postgresql?
@postgresql ||= Gitlab::Database.postgresql?
end
+ def can_bulk_insert_and_ignore_duplicates?
+ !postgresql_pre_9_5?
+ end
+
def postgresql_pre_9_5?
@postgresql_pre_9_5 ||= postgresql? &&
ActiveRecord::Base.connection.select_value('SHOW server_version_num').to_i < 90500