1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
# frozen_string_literal: true
# Import large project archives
#
# This task:
# 1. Disables ObjectStorage for archive upload
# 2. Performs Sidekiq job synchronously
#
# @example
# bundle exec rake "gitlab:import_export:import[root, root, imported_project, /path/to/file.tar.gz, true]"
#
namespace :gitlab do
namespace :import_export do
desc 'GitLab | Import/Export | EXPERIMENTAL | Import large project archives'
task :import, [:username, :namespace_path, :project_path, :archive_path, :measurement_enabled] => :gitlab_environment do |_t, args|
# Load it here to avoid polluting Rake tasks with Sidekiq test warnings
require 'sidekiq/testing'
warn_user_is_not_gitlab
if ENV['IMPORT_DEBUG'].present?
ActiveRecord::Base.logger = Logger.new(STDOUT)
end
GitlabProjectImport.new(
namespace_path: args.namespace_path,
project_path: args.project_path,
username: args.username,
file_path: args.archive_path,
measurement_enabled: args.measurement_enabled == 'true'
).import
end
end
end
class GitlabProjectImport
def initialize(opts)
@project_path = opts.fetch(:project_path)
@file_path = opts.fetch(:file_path)
@namespace = Namespace.find_by_full_path(opts.fetch(:namespace_path))
@current_user = User.find_by_username(opts.fetch(:username))
@measurement_enabled = opts.fetch(:measurement_enabled)
end
def import
show_import_start_message
run_isolated_sidekiq_job
show_import_failures_count
if @project&.import_state&.last_error
puts "ERROR: #{@project.import_state.last_error}"
exit 1
elsif @project.errors.any?
puts "ERROR: #{@project.errors.full_messages.join(', ')}"
exit 1
else
puts 'Done!'
end
rescue StandardError => e
puts "Exception: #{e.message}"
puts e.backtrace
exit 1
end
private
def with_request_store
RequestStore.begin!
yield
ensure
RequestStore.end!
RequestStore.clear!
end
def with_count_queries(&block)
count = 0
counter_f = ->(name, started, finished, unique_id, payload) {
unless payload[:name].in? %w[CACHE SCHEMA]
count += 1
end
}
ActiveSupport::Notifications.subscribed(counter_f, "sql.active_record", &block)
puts "Number of sql calls: #{count}"
end
def with_gc_counter
gc_counts_before = GC.stat.select { |k, v| k =~ /count/ }
yield
gc_counts_after = GC.stat.select { |k, v| k =~ /count/ }
stats = gc_counts_before.merge(gc_counts_after) { |k, vb, va| va - vb }
puts "Total GC count: #{stats[:count]}"
puts "Minor GC count: #{stats[:minor_gc_count]}"
puts "Major GC count: #{stats[:major_gc_count]}"
end
def with_measure_time
timing = Benchmark.realtime do
yield
end
time = Time.at(timing).utc.strftime("%H:%M:%S")
puts "Time to finish: #{time}"
end
def with_measuring
puts "Measuring enabled..."
with_gc_counter do
with_count_queries do
with_measure_time do
yield
end
end
end
end
def measurement_enabled?
@measurement_enabled != false
end
# We want to ensure that all Sidekiq jobs are executed
# synchronously as part of that process.
# This ensures that all expensive operations do not escape
# to general Sidekiq clusters/nodes.
def with_isolated_sidekiq_job
Sidekiq::Testing.fake! do
with_request_store do
# If you are attempting to import a large project into a development environment,
# you may see Gitaly throw an error about too many calls or invocations.
# This is due to a n+1 calls limit being set for development setups (not enforced in production)
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/24475#note_283090635
# For development setups, this code-path will be excluded from n+1 detection.
::Gitlab::GitalyClient.allow_n_plus_1_calls do
measurement_enabled? ? with_measuring { yield } : yield
end
end
true
end
end
def run_isolated_sidekiq_job
with_isolated_sidekiq_job do
@project = create_project
execute_sidekiq_job
end
end
def create_project
# We are disabling ObjectStorage for `import`
# as it is too slow to handle big archives:
# 1. DB transaction timeouts on upload
# 2. Download of archive before unpacking
disable_upload_object_storage do
service = Projects::GitlabProjectsImportService.new(
@current_user,
{
namespace_id: @namespace.id,
path: @project_path,
file: File.open(@file_path)
}
)
service.execute
end
end
def execute_sidekiq_job
Sidekiq::Worker.drain_all
end
def disable_upload_object_storage
overwrite_uploads_setting('background_upload', false) do
overwrite_uploads_setting('direct_upload', false) do
yield
end
end
end
def overwrite_uploads_setting(key, value)
old_value = Settings.uploads.object_store[key]
Settings.uploads.object_store[key] = value
yield
ensure
Settings.uploads.object_store[key] = old_value
end
def full_path
"#{@namespace.full_path}/#{@project_path}"
end
def show_import_start_message
puts "Importing GitLab export: #{@file_path} into GitLab" \
" #{full_path}" \
" as #{@current_user.name}"
end
def show_import_failures_count
return unless @project.import_failures.exists?
puts "Total number of not imported relations: #{@project.import_failures.count}"
end
end
|