From 74b3870a958cadf35fd3c13a78334c96d46de939 Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Sun, 26 Nov 2017 22:57:21 -0800 Subject: Address Rubocop offenses --- .../populate_untracked_uploads.rb | 64 +++++++++++++++------- .../prepare_untracked_uploads.rb | 57 +++++++++++++------ 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/lib/gitlab/background_migration/populate_untracked_uploads.rb b/lib/gitlab/background_migration/populate_untracked_uploads.rb index ea04484c6a1..802b661886b 100644 --- a/lib/gitlab/background_migration/populate_untracked_uploads.rb +++ b/lib/gitlab/background_migration/populate_untracked_uploads.rb @@ -1,12 +1,18 @@ +# frozen_string_literal: true + module Gitlab module BackgroundMigration - class PopulateUntrackedUploads - class UntrackedFile < ActiveRecord::Base + # This class processes a batch of rows in `untracked_files_for_uploads` by + # adding each file to the `uploads` table if it does not exist. + class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength + # This class is responsible for producing the attributes necessary to + # track an uploaded file in the `uploads` table. + class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength self.table_name = 'untracked_files_for_uploads' # Ends with /:random_hex/:filename - FILE_UPLOADER_PATH_PATTERN = %r{/\h+/[^/]+\z} - FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN = %r{\A(.+)#{FILE_UPLOADER_PATH_PATTERN}} + FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z} + FULL_PATH_CAPTURE = %r{\A(.+)#{FILE_UPLOADER_PATH}} # These regex patterns are tested against a relative path, relative to # the upload directory. @@ -44,7 +50,7 @@ module Gitlab model_type: 'Project' }, { - pattern: FILE_UPLOADER_PATH_PATTERN, + pattern: FILE_UPLOADER_PATH, uploader: 'FileUploader', model_type: 'Project' } @@ -63,13 +69,14 @@ module Gitlab def upload_path # UntrackedFile#path is absolute, but Upload#path depends on uploader - @upload_path ||= if uploader == 'FileUploader' - # Path relative to project directory in uploads - matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN) - matchd[0].sub(%r{\A/}, '') # remove leading slash - else - path - end + @upload_path ||= + if uploader == 'FileUploader' + # Path relative to project directory in uploads + matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH) + matchd[0].sub(%r{\A/}, '') # remove leading slash + else + path + end end def uploader @@ -83,7 +90,8 @@ module Gitlab def model_id return @model_id if defined?(@model_id) - matchd = path_relative_to_upload_dir.match(matching_pattern_map[:pattern]) + pattern = matching_pattern_map[:pattern] + matchd = path_relative_to_upload_dir.match(pattern) # If something is captured (matchd[1] is not nil), it is a model_id # Only the FileUploader pattern will not match an ID @@ -105,14 +113,20 @@ module Gitlab path_relative_to_upload_dir.match(path_pattern_map[:pattern]) end - raise "Unknown upload path pattern \"#{path}\"" unless @matching_pattern_map + unless @matching_pattern_map + raise "Unknown upload path pattern \"#{path}\"" + end @matching_pattern_map end def file_uploader_model_id - matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN) - raise "Could not capture project full_path from a FileUploader path: \"#{path_relative_to_upload_dir}\"" unless matchd + matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE) + not_found_msg = <<~MSG + Could not capture project full_path from a FileUploader path: + "#{path_relative_to_upload_dir}" + MSG + raise not_found_msg unless matchd full_path = matchd[1] project = Project.find_by_full_path(full_path) @@ -123,7 +137,8 @@ module Gitlab # Not including a leading slash def path_relative_to_upload_dir - base = %r{\A#{Regexp.escape(Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR)}/} + upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength + base = %r{\A#{Regexp.escape(upload_dir)}/} @path_relative_to_upload_dir ||= path.sub(base, '') end @@ -132,6 +147,7 @@ module Gitlab end end + # This class is used to query the `uploads` table. class Upload < ActiveRecord::Base self.table_name = 'uploads' end @@ -192,8 +208,10 @@ module Gitlab end ids.each do |model_type, model_ids| - found_ids = Object.const_get(model_type).where(id: model_ids.uniq).pluck(:id) - ids[model_type] = ids[model_type] - found_ids # replace with deleted ids + model_class = Object.const_get(model_type) + found_ids = model_class.where(id: model_ids.uniq).pluck(:id) + deleted_ids = ids[model_type] - found_ids + ids[model_type] = deleted_ids end ids @@ -204,11 +222,15 @@ module Gitlab file.to_h.merge(created_at: 'NOW()') end - Gitlab::Database.bulk_insert('uploads', rows, disable_quote: :created_at) + Gitlab::Database.bulk_insert('uploads', + rows, + disable_quote: :created_at) end def drop_temp_table_if_finished - UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty? + if UntrackedFile.all.empty? + UntrackedFile.connection.drop_table(:untracked_files_for_uploads) + end end end end diff --git a/lib/gitlab/background_migration/prepare_untracked_uploads.rb b/lib/gitlab/background_migration/prepare_untracked_uploads.rb index 358b76d39fb..d8cddd98cbb 100644 --- a/lib/gitlab/background_migration/prepare_untracked_uploads.rb +++ b/lib/gitlab/background_migration/prepare_untracked_uploads.rb @@ -1,10 +1,14 @@ +# frozen_string_literal: true + module Gitlab module BackgroundMigration - class PrepareUntrackedUploads + # This class finds all non-hashed uploaded file paths and saves them to a + # `untracked_files_for_uploads` table. + class PrepareUntrackedUploads # rubocop:disable Metrics/ClassLength # For bulk_queue_background_migration_jobs_by_range include Database::MigrationHelpers - FILE_PATH_BATCH_SIZE = 500 + FIND_BATCH_SIZE = 500 RELATIVE_UPLOAD_DIR = "uploads".freeze ABSOLUTE_UPLOAD_DIR = "#{CarrierWave.root}/#{RELATIVE_UPLOAD_DIR}".freeze FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze @@ -12,6 +16,8 @@ module Gitlab EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze + # This class is used to iterate over batches of + # `untracked_files_for_uploads` rows. class UntrackedFile < ActiveRecord::Base include EachBatch @@ -39,8 +45,9 @@ module Gitlab private def ensure_temporary_tracking_table_exists - unless UntrackedFile.connection.table_exists?(:untracked_files_for_uploads) - UntrackedFile.connection.create_table :untracked_files_for_uploads do |t| + table_name = :untracked_files_for_uploads + unless UntrackedFile.connection.table_exists?(table_name) + UntrackedFile.connection.create_table table_name do |t| t.string :path, limit: 600, null: false t.index :path, unique: true end @@ -54,7 +61,7 @@ module Gitlab def store_untracked_file_paths return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR) - each_file_batch(ABSOLUTE_UPLOAD_DIR, FILE_PATH_BATCH_SIZE) do |file_paths| + each_file_batch(ABSOLUTE_UPLOAD_DIR, FIND_BATCH_SIZE) do |file_paths| insert_file_paths(file_paths) end end @@ -85,12 +92,17 @@ module Gitlab end def build_find_command(search_dir) - cmd = %W[find #{search_dir} -type f ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune ) ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune ) -print0] + cmd = %W[find #{search_dir} + -type f + ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune ) + ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune ) + -print0] ionice = which_ionice cmd = %W[#{ionice} -c Idle] + cmd if ionice - Rails.logger.info "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\"" + log_msg = "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\"" + Rails.logger.info log_msg cmd end @@ -98,25 +110,32 @@ module Gitlab def which_ionice Gitlab::Utils.which('ionice') rescue StandardError - # In this case, returning false is relatively safe, even though it isn't very nice + # In this case, returning false is relatively safe, + # even though it isn't very nice false end def insert_file_paths(file_paths) - sql = if postgresql_pre_9_5? - "INSERT INTO #{table_columns_and_values_for_insert(file_paths)};" - elsif postgresql? - "INSERT INTO #{table_columns_and_values_for_insert(file_paths)} ON CONFLICT DO NOTHING;" - else # MySQL - "INSERT IGNORE INTO #{table_columns_and_values_for_insert(file_paths)};" - end + sql = insert_sql(file_paths) ActiveRecord::Base.connection.execute(sql) end + def insert_sql(file_paths) + if postgresql_pre_9_5? + "INSERT INTO #{table_columns_and_values_for_insert(file_paths)};" + elsif postgresql? + "INSERT INTO #{table_columns_and_values_for_insert(file_paths)}"\ + " ON CONFLICT DO NOTHING;" + else # MySQL + "INSERT IGNORE INTO"\ + " #{table_columns_and_values_for_insert(file_paths)};" + end + end + def table_columns_and_values_for_insert(file_paths) values = file_paths.map do |file_path| - ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend + ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend, Metrics/LineLength end.join(', ') "#{UntrackedFile.table_name} (path) VALUES #{values}" @@ -131,11 +150,13 @@ module Gitlab end def postgresql_pre_9_5? - @postgresql_pre_9_5 ||= postgresql? && Gitlab::Database.version.to_f < 9.5 + @postgresql_pre_9_5 ||= postgresql? && + Gitlab::Database.version.to_f < 9.5 end def schedule_populate_untracked_uploads_jobs - bulk_queue_background_migration_jobs_by_range(UntrackedFile, FOLLOW_UP_MIGRATION) + bulk_queue_background_migration_jobs_by_range( + UntrackedFile, FOLLOW_UP_MIGRATION) end end end -- cgit v1.2.1