diff options
author | Michael Kozono <mkozono@gmail.com> | 2017-11-20 16:27:24 -0800 |
---|---|---|
committer | Michael Kozono <mkozono@gmail.com> | 2017-12-01 15:26:41 -0800 |
commit | edb5cac46c1cba1029fb3e67d4853027590584f6 (patch) | |
tree | 3f17ff04ba05aea3143a69abd78124417437b024 /lib/gitlab/background_migration | |
parent | 17ce21d74eab4d2973d372cb3f97258eb3b81de9 (diff) | |
download | gitlab-ce-edb5cac46c1cba1029fb3e67d4853027590584f6.tar.gz |
Use bulk inserts
Diffstat (limited to 'lib/gitlab/background_migration')
-rw-r--r-- | lib/gitlab/background_migration/prepare_untracked_uploads.rb | 55 |
1 files changed, 35 insertions, 20 deletions
diff --git a/lib/gitlab/background_migration/prepare_untracked_uploads.rb b/lib/gitlab/background_migration/prepare_untracked_uploads.rb index c3f5dddb07d..022b2f41393 100644 --- a/lib/gitlab/background_migration/prepare_untracked_uploads.rb +++ b/lib/gitlab/background_migration/prepare_untracked_uploads.rb @@ -20,7 +20,19 @@ module Gitlab def perform ensure_temporary_tracking_table_exists + + # Since Postgres < 9.5 does not have ON CONFLICT DO NOTHING, and since + # doing inserts-if-not-exists without ON CONFLICT DO NOTHING would be + # slow, start with an empty table for Postgres < 9.5. + # That way we can do bulk inserts at ~30x the speed of individual + # inserts (~20 minutes worth of inserts at GitLab.com scale instead of + # ~10 hours). + # In all other cases, installations will get both bulk inserts and the + # ability for these jobs to retry without having to clear and reinsert. + clear_untracked_file_paths unless can_bulk_insert_and_ignore_duplicates? + store_untracked_file_paths + schedule_populate_untracked_uploads_jobs end @@ -44,6 +56,10 @@ module Gitlab end end + def clear_untracked_file_paths + UntrackedFile.delete_all + end + def store_untracked_file_paths return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR) @@ -96,36 +112,35 @@ module Gitlab end def insert_file_paths(file_paths) - ActiveRecord::Base.transaction do - file_paths.each do |file_path| - insert_file_path(file_path) - end - end - end + sql = if postgresql_pre_9_5? + "INSERT INTO #{table_columns_and_values_for_insert(file_paths)};" + elsif postgresql? + "INSERT INTO #{table_columns_and_values_for_insert(file_paths)} ON CONFLICT DO NOTHING;" + else # MySQL + "INSERT IGNORE INTO #{table_columns_and_values_for_insert(file_paths)};" + end - def insert_file_path(file_path) - if postgresql_pre_9_5? - # No easy way to do ON CONFLICT DO NOTHING before Postgres 9.5 so just use Rails - return UntrackedFile.where(path: file_path).first_or_create - end + ActiveRecord::Base.connection.execute(sql) + end - table_columns_and_values = 'untracked_files_for_uploads (path, created_at, updated_at) VALUES (?, ?, ?)' + def table_columns_and_values_for_insert(file_paths) + timestamp = Time.now.utc.iso8601 - sql = if postgresql? - "INSERT INTO #{table_columns_and_values} ON CONFLICT DO NOTHING;" - else - "INSERT IGNORE INTO #{table_columns_and_values};" - end + values = file_paths.map do |file_path| + ActiveRecord::Base.send(:sanitize_sql_array, ['(?, ?, ?)', file_path, timestamp, timestamp]) # rubocop:disable GitlabSecurity/PublicSend + end.join(', ') - timestamp = Time.now.utc.iso8601 - sql = ActiveRecord::Base.send(:sanitize_sql_array, [sql, file_path, timestamp, timestamp]) # rubocop:disable GitlabSecurity/PublicSend - ActiveRecord::Base.connection.execute(sql) + "#{UntrackedFile.table_name} (path, created_at, updated_at) VALUES #{values}" end def postgresql? @postgresql ||= Gitlab::Database.postgresql? end + def can_bulk_insert_and_ignore_duplicates? + !postgresql_pre_9_5? + end + def postgresql_pre_9_5? @postgresql_pre_9_5 ||= postgresql? && ActiveRecord::Base.connection.select_value('SHOW server_version_num').to_i < 90500 |