diff options
author | Sean McGivern <sean@mcgivern.me.uk> | 2018-03-05 11:16:17 +0000 |
---|---|---|
committer | Micaƫl Bergeron <mbergeron@gitlab.com> | 2018-03-08 10:20:40 -0500 |
commit | edbcde8877f497ea675fde811065679286a1aa56 (patch) | |
tree | beefe40d469f06fc72f2513f73b336115b0f6144 /lib | |
parent | 2387ef2b4a9944749f0cd1db5c7e7f55f3745780 (diff) | |
download | gitlab-ce-edbcde8877f497ea675fde811065679286a1aa56.tar.gz |
[cherry-pick] '4862-verify-file-checksums'
See merge request gitlab-org/gitlab-ee!4753
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/verify/batch_verifier.rb | 64 | ||||
-rw-r--r-- | lib/gitlab/verify/lfs_objects.rb | 27 | ||||
-rw-r--r-- | lib/gitlab/verify/rake_task.rb | 53 | ||||
-rw-r--r-- | lib/gitlab/verify/uploads.rb | 27 | ||||
-rw-r--r-- | lib/tasks/gitlab/lfs/check.rake | 8 | ||||
-rw-r--r-- | lib/tasks/gitlab/lfs/migrate.rake (renamed from lib/tasks/gitlab/lfs.rake) | 0 |
6 files changed, 179 insertions, 0 deletions
diff --git a/lib/gitlab/verify/batch_verifier.rb b/lib/gitlab/verify/batch_verifier.rb new file mode 100644 index 00000000000..1ef369a4b67 --- /dev/null +++ b/lib/gitlab/verify/batch_verifier.rb @@ -0,0 +1,64 @@ +module Gitlab + module Verify + class BatchVerifier + attr_reader :batch_size, :start, :finish + + def initialize(batch_size:, start: nil, finish: nil) + @batch_size = batch_size + @start = start + @finish = finish + end + + # Yields a Range of IDs and a Hash of failed verifications (object => error) + def run_batches(&blk) + relation.in_batches(of: batch_size, start: start, finish: finish) do |relation| # rubocop: disable Cop/InBatches + range = relation.first.id..relation.last.id + failures = run_batch(relation) + + yield(range, failures) + end + end + + def name + raise NotImplementedError.new + end + + def describe(_object) + raise NotImplementedError.new + end + + private + + def run_batch(relation) + relation.map { |upload| verify(upload) }.compact.to_h + end + + def verify(object) + expected = expected_checksum(object) + actual = actual_checksum(object) + + raise 'Checksum missing' unless expected.present? + raise 'Checksum mismatch' unless expected == actual + + nil + rescue => err + [object, err] + end + + # This should return an ActiveRecord::Relation suitable for calling #in_batches on + def relation + raise NotImplementedError.new + end + + # The checksum we expect the object to have + def expected_checksum(_object) + raise NotImplementedError.new + end + + # The freshly-recalculated checksum of the object + def actual_checksum(_object) + raise NotImplementedError.new + end + end + end +end diff --git a/lib/gitlab/verify/lfs_objects.rb b/lib/gitlab/verify/lfs_objects.rb new file mode 100644 index 00000000000..970e2a7b718 --- /dev/null +++ b/lib/gitlab/verify/lfs_objects.rb @@ -0,0 +1,27 @@ +module Gitlab + module Verify + class LfsObjects < BatchVerifier + def name + 'LFS objects' + end + + def describe(object) + "LFS object: #{object.oid}" + end + + private + + def relation + LfsObject.with_files_stored_locally + end + + def expected_checksum(lfs_object) + lfs_object.oid + end + + def actual_checksum(lfs_object) + LfsObject.calculate_oid(lfs_object.file.path) + end + end + end +end diff --git a/lib/gitlab/verify/rake_task.rb b/lib/gitlab/verify/rake_task.rb new file mode 100644 index 00000000000..dd138e6b92b --- /dev/null +++ b/lib/gitlab/verify/rake_task.rb @@ -0,0 +1,53 @@ +module Gitlab + module Verify + class RakeTask + def self.run!(verify_kls) + verifier = verify_kls.new( + batch_size: ENV.fetch('BATCH', 200).to_i, + start: ENV['ID_FROM'], + finish: ENV['ID_TO'] + ) + + verbose = Gitlab::Utils.to_boolean(ENV['VERBOSE']) + + new(verifier, verbose).run! + end + + attr_reader :verifier, :output + + def initialize(verifier, verbose) + @verifier = verifier + @verbose = verbose + end + + def run! + say "Checking integrity of #{verifier.name}" + + verifier.run_batches { |*args| run_batch(*args) } + + say 'Done!' + end + + def verbose? + !!@verbose + end + + private + + def say(text) + puts(text) # rubocop:disable Rails/Output + end + + def run_batch(range, failures) + status_color = failures.empty? ? :green : :red + say "- #{range}: Failures: #{failures.count}".color(status_color) + + return unless verbose? + + failures.each do |object, error| + say " - #{verifier.describe(object)}: #{error.inspect}".color(:red) + end + end + end + end +end diff --git a/lib/gitlab/verify/uploads.rb b/lib/gitlab/verify/uploads.rb new file mode 100644 index 00000000000..0ffa71a6d72 --- /dev/null +++ b/lib/gitlab/verify/uploads.rb @@ -0,0 +1,27 @@ +module Gitlab + module Verify + class Uploads < BatchVerifier + def name + 'Uploads' + end + + def describe(object) + "Upload: #{object.id}" + end + + private + + def relation + Upload.with_files_stored_locally + end + + def expected_checksum(upload) + upload.checksum + end + + def actual_checksum(upload) + Upload.hexdigest(upload.absolute_path) + end + end + end +end diff --git a/lib/tasks/gitlab/lfs/check.rake b/lib/tasks/gitlab/lfs/check.rake new file mode 100644 index 00000000000..869463d4e5d --- /dev/null +++ b/lib/tasks/gitlab/lfs/check.rake @@ -0,0 +1,8 @@ +namespace :gitlab do + namespace :lfs do + desc 'GitLab | LFS | Check integrity of uploaded LFS objects' + task check: :environment do + Gitlab::Verify::RakeTask.run!(Gitlab::Verify::LfsObjects) + end + end +end diff --git a/lib/tasks/gitlab/lfs.rake b/lib/tasks/gitlab/lfs/migrate.rake index a45e5ca91e0..a45e5ca91e0 100644 --- a/lib/tasks/gitlab/lfs.rake +++ b/lib/tasks/gitlab/lfs/migrate.rake |