# frozen_string_literal: true module Gitlab module BackgroundMigration module PopulateUntrackedUploadsDependencies # This class is responsible for producing the attributes necessary to # track an uploaded file in the `uploads` table. class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength self.table_name = 'untracked_files_for_uploads' # Ends with /:random_hex/:filename FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}.freeze FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/.freeze # These regex patterns are tested against a relative path, relative to # the upload directory. # For convenience, if there exists a capture group in the pattern, then # it indicates the model_id. PATH_PATTERNS = [ { pattern: %r{\A-/system/appearance/logo/(\d+)/}, uploader: 'AttachmentUploader', model_type: 'Appearance' }, { pattern: %r{\A-/system/appearance/header_logo/(\d+)/}, uploader: 'AttachmentUploader', model_type: 'Appearance' }, { pattern: %r{\A-/system/note/attachment/(\d+)/}, uploader: 'AttachmentUploader', model_type: 'Note' }, { pattern: %r{\A-/system/user/avatar/(\d+)/}, uploader: 'AvatarUploader', model_type: 'User' }, { pattern: %r{\A-/system/group/avatar/(\d+)/}, uploader: 'AvatarUploader', model_type: 'Namespace' }, { pattern: %r{\A-/system/project/avatar/(\d+)/}, uploader: 'AvatarUploader', model_type: 'Project' }, { pattern: FILE_UPLOADER_PATH, uploader: 'FileUploader', model_type: 'Project' } ].freeze def to_h @upload_hash ||= { path: upload_path, uploader: uploader, model_type: model_type, model_id: model_id, size: file_size, checksum: checksum } end def upload_path # UntrackedFile#path is absolute, but Upload#path depends on uploader @upload_path ||= if uploader == 'FileUploader' # Path relative to project directory in uploads matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH) matchd[0].sub(%r{\A/}, '') # remove leading slash else path end end def uploader matching_pattern_map[:uploader] end def model_type matching_pattern_map[:model_type] end def model_id return @model_id if defined?(@model_id) pattern = matching_pattern_map[:pattern] matchd = path_relative_to_upload_dir.match(pattern) # If something is captured (matchd[1] is not nil), it is a model_id # Only the FileUploader pattern will not match an ID @model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id end def file_size File.size(absolute_path) end def checksum Digest::SHA256.file(absolute_path).hexdigest end private def matching_pattern_map @matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map| path_relative_to_upload_dir.match(path_pattern_map[:pattern]) end unless @matching_pattern_map raise "Unknown upload path pattern \"#{path}\"" end @matching_pattern_map end def file_uploader_model_id matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE) not_found_msg = <<~MSG Could not capture project full_path from a FileUploader path: "#{path_relative_to_upload_dir}" MSG raise not_found_msg unless matchd full_path = matchd[1] project = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Project.find_by_full_path(full_path) return unless project project.id end # Not including a leading slash def path_relative_to_upload_dir upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR base = %r{\A#{Regexp.escape(upload_dir)}/} @path_relative_to_upload_dir ||= path.sub(base, '') end def absolute_path File.join(Gitlab.config.uploads.storage_path, path) end end # Avoid using application code class Upload < ActiveRecord::Base self.table_name = 'uploads' end # Avoid using application code class Appearance < ActiveRecord::Base self.table_name = 'appearances' end # Avoid using application code class Namespace < ActiveRecord::Base self.table_name = 'namespaces' end # Avoid using application code class Note < ActiveRecord::Base self.table_name = 'notes' end # Avoid using application code class User < ActiveRecord::Base self.table_name = 'users' end # Since project Markdown upload paths don't contain the project ID, we have to find the # project by its full_path. Due to MySQL/PostgreSQL differences, and historical reasons, # the logic is somewhat complex, so I've mostly copied it in here. class Project < ActiveRecord::Base self.table_name = 'projects' def self.find_by_full_path(path) binary = Gitlab::Database.mysql? ? 'BINARY' : '' order_sql = "(CASE WHEN #{binary} routes.path = #{connection.quote(path)} THEN 0 ELSE 1 END)" where_full_path_in(path).reorder(order_sql).take end def self.where_full_path_in(path) cast_lower = Gitlab::Database.postgresql? path = connection.quote(path) where = if cast_lower "(LOWER(routes.path) = LOWER(#{path}))" else "(routes.path = #{path})" end joins("INNER JOIN routes ON routes.source_id = projects.id AND routes.source_type = 'Project'").where(where) end end end end end