diff options
author | Kamil Trzciński <ayufan@ayufan.eu> | 2019-09-02 14:25:17 +0200 |
---|---|---|
committer | Kamil Trzciński <ayufan@ayufan.eu> | 2019-09-05 17:40:28 +0200 |
commit | c08d5a7046741b4278c39f542826efea7bfa3771 (patch) | |
tree | 74246307d246d9e97421dd520099eeb15081fa79 | |
parent | bbfbe50566a2ada9d65be013c1486292ee09c90b (diff) | |
download | gitlab-ce-kamil-import-export-constant-memory.tar.gz |
Make constant-memory export serializerkamil-import-export-constant-memory
This makes the serialization to be executed
lazily, using memory-optimised approach,
and being fast to recycle.
The biggest benefit comes from the fact that
storing full Hash of object serialization is expensive,
as it also often results in holding original
representations of objects.
This solves that, by making batch serialization,
that outputs exact raw JSON content to generated string.
JSON generator is optimised to have efficient string
appender, this makes use of that.
-rw-r--r-- | lib/gitlab/import_export/fast_hash_serializer.rb | 43 | ||||
-rw-r--r-- | lib/gitlab/import_export/project_tree_saver.rb | 3 | ||||
-rw-r--r-- | lib/gitlab/import_export/relation_rename_service.rb | 4 |
3 files changed, 45 insertions, 5 deletions
diff --git a/lib/gitlab/import_export/fast_hash_serializer.rb b/lib/gitlab/import_export/fast_hash_serializer.rb index 6206840b472..aabd63c4aff 100644 --- a/lib/gitlab/import_export/fast_hash_serializer.rb +++ b/lib/gitlab/import_export/fast_hash_serializer.rb @@ -26,6 +26,45 @@ module Gitlab class FastHashSerializer attr_reader :subject, :tree + # Usage of this class results in delayed + # serialization of relation. The serialization + # will be triggered when the `JSON.generate` + # is exected. + # + # This class uses memory-optimised, lazily + # initialised, fast to recycle relation + # serialization. + # + # The `JSON.generate` does use `#to_json`, + # that returns raw JSON content that is written + # directly to file. + class JSONBatchRelation + def initialize(relation, options, preloads) + @relation = relation + @options = options + @preloads = preloads + end + + def to_json(options = {}) + result = String.new + items = 0 + + batch = @relation + batch = batch.preload(@preloads) if @preloads + batch.each do |item| + result.concat(",") unless result.empty? + result.concat(item.to_json(@options)) + items += 1 + end + + result + end + + def as_json(*) + raise NotImplementedError + end + end + BATCH_SIZE = 100 def initialize(subject, tree, batch_size: BATCH_SIZE) @@ -85,12 +124,10 @@ module Gitlab return record.as_json(options) end - # has-many relation data = [] record.in_batches(of: @batch_size) do |batch| # rubocop:disable Cop/InBatches - batch = batch.preload(preloads[key]) if preloads&.key?(key) - data += batch.as_json(options) + data.append(JSONBatchRelation.new(batch, options, preloads[key])) end data diff --git a/lib/gitlab/import_export/project_tree_saver.rb b/lib/gitlab/import_export/project_tree_saver.rb index f75f69b2c75..b1a20dbd3d5 100644 --- a/lib/gitlab/import_export/project_tree_saver.rb +++ b/lib/gitlab/import_export/project_tree_saver.rb @@ -20,7 +20,8 @@ module Gitlab project_tree = serialize_project_tree fix_project_tree(project_tree) - File.write(full_path, project_tree.to_json) + project_tree_json = JSON.generate(project_tree) + File.write(full_path, project_tree_json) true rescue => e diff --git a/lib/gitlab/import_export/relation_rename_service.rb b/lib/gitlab/import_export/relation_rename_service.rb index 179bde5e21e..22813aeb83e 100644 --- a/lib/gitlab/import_export/relation_rename_service.rb +++ b/lib/gitlab/import_export/relation_rename_service.rb @@ -19,8 +19,10 @@ module Gitlab module ImportExport class RelationRenameService + # Each relation added will result in being + # exported twice, consuming twice amount of + # resources RENAMES = { - 'pipelines' => 'ci_pipelines' # Added in 11.6, remove in 11.7 }.freeze def self.rename(tree_hash) |