diff options
Diffstat (limited to 'lib/gitlab/cleanup/orphan_job_artifact_files.rb')
-rw-r--r-- | lib/gitlab/cleanup/orphan_job_artifact_files.rb | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_files.rb b/lib/gitlab/cleanup/orphan_job_artifact_files.rb new file mode 100644 index 00000000000..ee7164b3e55 --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_files.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + class OrphanJobArtifactFiles + include Gitlab::Utils::StrongMemoize + + ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze + LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze + BATCH_SIZE = 500 + DEFAULT_NICENESS = 'Best-effort' + + attr_accessor :batch, :total_found, :total_cleaned + attr_reader :limit, :dry_run, :niceness, :logger + + def initialize(limit: nil, dry_run: true, niceness: nil, logger: nil) + @limit = limit + @dry_run = dry_run + @niceness = niceness || DEFAULT_NICENESS + @logger = logger || Rails.logger + @total_found = @total_cleaned = 0 + + new_batch! + end + + def run! + log_info('Looking for orphan job artifacts to clean up') + + find_artifacts do |artifact_file| + batch << artifact_file + + clean_batch! if batch.full? + break if limit_reached? + end + + clean_batch! + + log_info("Processed #{total_found} job artifacts to find and clean #{total_cleaned} orphans.") + end + + private + + def new_batch! + self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch + .new(batch_size: batch_size, logger: logger, dry_run: dry_run) + end + + def clean_batch! + batch.clean! + + update_stats!(batch) + + new_batch! + end + + def update_stats!(batch) + self.total_found += batch.artifact_files.count + self.total_cleaned += batch.lost_and_found.count + end + + def limit_reached? + return false unless limit + + total_cleaned >= limit + end + + def batch_size + return BATCH_SIZE unless limit + return if limit_reached? + + todo = limit - total_cleaned + [BATCH_SIZE, todo].min + end + + def find_artifacts + Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread| + stdout.each_line do |line| + yield line + end + + log_error(stderr.read.color(:red)) unless status_thread.value.success? + end + end + + def find_command + strong_memoize(:find_command) do + cmd = %W[find -L #{absolute_artifact_dir}] + + # Search for Job Artifact IDs, they are found 6 directory + # levels deep. For example: + # shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log + # 1 2 3 4 5 6 + # | | | ^- date | ^- Job Artifact ID + # | | | ^- Job ID + # ^--+--+- components of hashed storage project path + cmd += %w[-mindepth 6 -maxdepth 6] + + # Artifact directories are named on their ID + cmd += %w[-type d] + + if ionice + raise ArgumentError, 'Invalid niceness' unless niceness.match?(/^\w[\w\-]*$/) + + cmd.unshift(*%W[#{ionice} --class #{niceness}]) + end + + log_info("find command: '#{cmd.join(' ')}'") + + cmd + end + end + + def absolute_artifact_dir + File.absolute_path(ABSOLUTE_ARTIFACT_DIR) + end + + def ionice + strong_memoize(:ionice) do + Gitlab::Utils.which('ionice') + end + end + + def log_info(msg, params = {}) + logger.info("#{'[DRY RUN]' if dry_run} #{msg}") + end + + def log_error(msg, params = {}) + logger.error(msg) + end + end + end +end |