summaryrefslogtreecommitdiff
path: root/lib/bulk_imports/common/extractors/ndjson_extractor.rb
blob: 788d10ca364e9badca4156908b169c98aed72263 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# frozen_string_literal: true

module BulkImports
  module Common
    module Extractors
      class NdjsonExtractor
        include Gitlab::ImportExport::CommandLineUtil
        include Gitlab::Utils::StrongMemoize

        FILE_SIZE_LIMIT = 5.gigabytes
        ALLOWED_CONTENT_TYPES = %w(application/gzip application/octet-stream).freeze
        EXPORT_DOWNLOAD_URL_PATH = "/%{resource}/%{full_path}/export_relations/download?relation=%{relation}"

        def initialize(relation:)
          @relation = relation
          @tmp_dir = Dir.mktmpdir
        end

        def extract(context)
          download_service(tmp_dir, context).execute
          decompression_service(tmp_dir).execute
          relations = ndjson_reader(tmp_dir).consume_relation('', relation)

          BulkImports::Pipeline::ExtractedData.new(data: relations)
        end

        def remove_tmp_dir
          FileUtils.remove_entry(tmp_dir)
        end

        private

        attr_reader :relation, :tmp_dir

        def filename
          @filename ||= "#{relation}.ndjson.gz"
        end

        def download_service(tmp_dir, context)
          @download_service ||= BulkImports::FileDownloadService.new(
            configuration: context.configuration,
            relative_url: relative_resource_url(context),
            dir: tmp_dir,
            filename: filename,
            file_size_limit: FILE_SIZE_LIMIT,
            allowed_content_types: ALLOWED_CONTENT_TYPES
          )
        end

        def decompression_service(tmp_dir)
          @decompression_service ||= BulkImports::FileDecompressionService.new(
            dir: tmp_dir,
            filename: filename
          )
        end

        def ndjson_reader(tmp_dir)
          @ndjson_reader ||= Gitlab::ImportExport::Json::NdjsonReader.new(tmp_dir)
        end

        def relative_resource_url(context)
          strong_memoize(:relative_resource_url) do
            resource = context.portable.class.name.downcase.pluralize
            encoded_full_path = context.entity.encoded_source_full_path

            EXPORT_DOWNLOAD_URL_PATH % { resource: resource, full_path: encoded_full_path, relation: relation }
          end
        end
      end
    end
  end
end