summaryrefslogtreecommitdiff
path: root/app/services/bulk_imports/archive_extraction_service.rb
blob: 9fc828b8e34551ce88ec09c3a5bcd32c7e52e6f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# frozen_string_literal: true

# Archive Extraction Service allows extraction of contents
# from `tar` archives with an additional handling (removal)
# of file symlinks.
#
# @param tmpdir [String] A path where archive is located
# and where its contents are extracted.
# Tmpdir directory must be located under `Dir.tmpdir`.
# `BulkImports::Error` is raised if any other directory path is used.
#
# @param filename [String] Name of the file to extract contents from.
#
# @example
#   dir = Dir.mktmpdir
#   filename = 'things.tar'
#   BulkImports::ArchiveExtractionService.new(tmpdir: dir, filename: filename).execute
#   Dir.glob(File.join(dir, '**', '*'))
#   => ['/path/to/tmp/dir/extracted_file_1', '/path/to/tmp/dir/extracted_file_2', '/path/to/tmp/dir/extracted_file_3']
module BulkImports
  class ArchiveExtractionService
    include Gitlab::ImportExport::CommandLineUtil

    def initialize(tmpdir:, filename:)
      @tmpdir = tmpdir
      @filename = filename
      @filepath = File.join(@tmpdir, @filename)
    end

    def execute
      validate_filepath
      validate_tmpdir
      validate_symlink

      extract_archive
      remove_symlinks
      tmpdir
    end

    private

    attr_reader :tmpdir, :filename, :filepath

    def validate_filepath
      Gitlab::Utils.check_path_traversal!(filepath)
    end

    def validate_tmpdir
      raise(BulkImports::Error, 'Invalid target directory') unless File.expand_path(tmpdir).start_with?(Dir.tmpdir)
    end

    def validate_symlink
      raise(BulkImports::Error, 'Invalid file') if symlink?(filepath)
    end

    def symlink?(filepath)
      File.lstat(filepath).symlink?
    end

    def extract_archive
      untar_xf(archive: filepath, dir: tmpdir)
    end

    def extracted_files
      Dir.glob(File.join(tmpdir, '**', '*'))
    end

    def remove_symlinks
      extracted_files.each do |path|
        FileUtils.rm(path) if symlink?(path)
      end
    end
  end
end