summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKamil Trzciński <ayufan@ayufan.eu>2019-09-02 14:25:17 +0200
committerKamil Trzciński <ayufan@ayufan.eu>2019-09-05 17:40:28 +0200
commitc08d5a7046741b4278c39f542826efea7bfa3771 (patch)
tree74246307d246d9e97421dd520099eeb15081fa79
parentbbfbe50566a2ada9d65be013c1486292ee09c90b (diff)
downloadgitlab-ce-kamil-import-export-constant-memory.tar.gz
Make constant-memory export serializerkamil-import-export-constant-memory
This makes the serialization to be executed lazily, using memory-optimised approach, and being fast to recycle. The biggest benefit comes from the fact that storing full Hash of object serialization is expensive, as it also often results in holding original representations of objects. This solves that, by making batch serialization, that outputs exact raw JSON content to generated string. JSON generator is optimised to have efficient string appender, this makes use of that.
-rw-r--r--lib/gitlab/import_export/fast_hash_serializer.rb43
-rw-r--r--lib/gitlab/import_export/project_tree_saver.rb3
-rw-r--r--lib/gitlab/import_export/relation_rename_service.rb4
3 files changed, 45 insertions, 5 deletions
diff --git a/lib/gitlab/import_export/fast_hash_serializer.rb b/lib/gitlab/import_export/fast_hash_serializer.rb
index 6206840b472..aabd63c4aff 100644
--- a/lib/gitlab/import_export/fast_hash_serializer.rb
+++ b/lib/gitlab/import_export/fast_hash_serializer.rb
@@ -26,6 +26,45 @@ module Gitlab
class FastHashSerializer
attr_reader :subject, :tree
+ # Usage of this class results in delayed
+ # serialization of relation. The serialization
+ # will be triggered when the `JSON.generate`
+ # is exected.
+ #
+ # This class uses memory-optimised, lazily
+ # initialised, fast to recycle relation
+ # serialization.
+ #
+ # The `JSON.generate` does use `#to_json`,
+ # that returns raw JSON content that is written
+ # directly to file.
+ class JSONBatchRelation
+ def initialize(relation, options, preloads)
+ @relation = relation
+ @options = options
+ @preloads = preloads
+ end
+
+ def to_json(options = {})
+ result = String.new
+ items = 0
+
+ batch = @relation
+ batch = batch.preload(@preloads) if @preloads
+ batch.each do |item|
+ result.concat(",") unless result.empty?
+ result.concat(item.to_json(@options))
+ items += 1
+ end
+
+ result
+ end
+
+ def as_json(*)
+ raise NotImplementedError
+ end
+ end
+
BATCH_SIZE = 100
def initialize(subject, tree, batch_size: BATCH_SIZE)
@@ -85,12 +124,10 @@ module Gitlab
return record.as_json(options)
end
- # has-many relation
data = []
record.in_batches(of: @batch_size) do |batch| # rubocop:disable Cop/InBatches
- batch = batch.preload(preloads[key]) if preloads&.key?(key)
- data += batch.as_json(options)
+ data.append(JSONBatchRelation.new(batch, options, preloads[key]))
end
data
diff --git a/lib/gitlab/import_export/project_tree_saver.rb b/lib/gitlab/import_export/project_tree_saver.rb
index f75f69b2c75..b1a20dbd3d5 100644
--- a/lib/gitlab/import_export/project_tree_saver.rb
+++ b/lib/gitlab/import_export/project_tree_saver.rb
@@ -20,7 +20,8 @@ module Gitlab
project_tree = serialize_project_tree
fix_project_tree(project_tree)
- File.write(full_path, project_tree.to_json)
+ project_tree_json = JSON.generate(project_tree)
+ File.write(full_path, project_tree_json)
true
rescue => e
diff --git a/lib/gitlab/import_export/relation_rename_service.rb b/lib/gitlab/import_export/relation_rename_service.rb
index 179bde5e21e..22813aeb83e 100644
--- a/lib/gitlab/import_export/relation_rename_service.rb
+++ b/lib/gitlab/import_export/relation_rename_service.rb
@@ -19,8 +19,10 @@
module Gitlab
module ImportExport
class RelationRenameService
+ # Each relation added will result in being
+ # exported twice, consuming twice amount of
+ # resources
RENAMES = {
- 'pipelines' => 'ci_pipelines' # Added in 11.6, remove in 11.7
}.freeze
def self.rename(tree_hash)