summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorMichael Kozono <mkozono@gmail.com>2017-11-23 23:12:24 -0800
committerMichael Kozono <mkozono@gmail.com>2017-12-01 15:26:42 -0800
commit908aacddda571bc82c722798f399fd5a68ebe1da (patch)
tree1a072420766c29efe93578e7b91933fb330e2b3a /lib
parent7549d17f721b3be84f83c1dfa491d6a2ebf4ec28 (diff)
downloadgitlab-ce-908aacddda571bc82c722798f399fd5a68ebe1da.tar.gz
Filter existing uploads with one query
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/background_migration/populate_untracked_uploads.rb80
1 files changed, 42 insertions, 38 deletions
diff --git a/lib/gitlab/background_migration/populate_untracked_uploads.rb b/lib/gitlab/background_migration/populate_untracked_uploads.rb
index 41dc5a3ed7e..8d6da0a7a67 100644
--- a/lib/gitlab/background_migration/populate_untracked_uploads.rb
+++ b/lib/gitlab/background_migration/populate_untracked_uploads.rb
@@ -50,37 +50,25 @@ module Gitlab
}
].freeze
- def ensure_tracked!
- add_to_uploads_if_needed
-
- delete
- end
-
- def add_to_uploads_if_needed
- # Even though we are checking relative paths, path is enough to
- # uniquely identify uploads. There is no ambiguity between
- # FileUploader paths and other Uploader paths because we use the /-/
- # separator kind of like an escape character. Project full_path will
- # never conflict with an upload path starting with "uploads/-/".
- Upload.
- where(path: upload_path).
- first_or_create!(
- uploader: uploader,
- model_type: model_type,
- model_id: model_id,
- size: file_size
- )
+ def to_h
+ {
+ path: upload_path,
+ uploader: uploader,
+ model_type: model_type,
+ model_id: model_id,
+ size: file_size
+ }
end
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
- if uploader == 'FileUploader'
- # Path relative to project directory in uploads
- matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
- matchd[0].sub(%r{\A/}, '') # remove leading slash
- else
- path
- end
+ @upload_path ||= if uploader == 'FileUploader'
+ # Path relative to project directory in uploads
+ matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
+ matchd[0].sub(%r{\A/}, '') # remove leading slash
+ else
+ path
+ end
end
def uploader
@@ -187,17 +175,8 @@ module Gitlab
return unless migrate?
files = UntrackedFile.where(id: start_id..end_id)
- files.each do |untracked_file|
- begin
- untracked_file.ensure_tracked!
- rescue StandardError => e
- Rails.logger.warn "Failed to add untracked file to uploads: #{e.message}"
-
- # The untracked rows will remain in the DB. We will be able to see
- # which ones failed to become tracked, and then we can decide what
- # to do.
- end
- end
+ insert_uploads_if_needed(files)
+ files.delete_all
drop_temp_table_if_finished
end
@@ -208,6 +187,31 @@ module Gitlab
UntrackedFile.table_exists? && Upload.table_exists?
end
+ def insert_uploads_if_needed(files)
+ filtered_files = filter_existing_uploads(files)
+ filtered_files = filter_deleted_models(filtered_files)
+ insert(filtered_files)
+ end
+
+ def filter_existing_uploads(files)
+ paths = files.map(&:upload_path)
+ existing_paths = Upload.where(path: paths).pluck(:path).to_set
+
+ files.reject do |file|
+ existing_paths.include?(file.upload_path)
+ end
+ end
+
+ def filter_deleted_models(files)
+ files # TODO
+ end
+
+ def insert(files)
+ files.each do |file|
+ Upload.create!(file.to_h)
+ end
+ end
+
def drop_temp_table_if_finished
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty?
end