1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
|
#
# Author:: Daniel DeLeo (<dan@chef.io>)
# Copyright:: Copyright (c) Chef Software Inc.
# License:: Apache License, Version 2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
require_relative "../spec_helper"
require "chef/client"
describe Chef::RunLock do
# This behavior works on windows, but the tests use fork :(
describe "when locking the chef-client run", unix_only: true do
##
# Lockfile location and helpers
let(:random_temp_root) do
Kernel.srand(Time.now.to_i + Process.pid)
"#{Dir.tmpdir}/#{Kernel.rand(Time.now.to_i + Process.pid)}"
end
let(:lockfile) { "#{random_temp_root}/this/long/path/does/not/exist/chef-client-running.pid" }
# make sure to start with a clean slate.
before(:each) { log_event("rm -rf before"); FileUtils.rm_r(random_temp_root) if File.exist?(random_temp_root) }
after(:each) { log_event("rm -rf after"); FileUtils.rm_r(random_temp_root) if File.exist?(random_temp_root) }
def log_event(message, time = Time.now.strftime("%H:%M:%S.%L"))
events << [ message, time ]
end
def events
@events ||= []
end
WAIT_ON_LOCK_TIME = 1.0
def wait_on_lock(from_fork)
Timeout.timeout(WAIT_ON_LOCK_TIME) do
from_fork.readline
end
rescue Timeout::Error
raise "Lockfile never created, abandoning test"
end
CLIENT_PROCESS_TIMEOUT = 10
BREATHING_ROOM = 1
# ClientProcess is defined below
let!(:p1) { ClientProcess.new(self, "p1") }
let!(:p2) { ClientProcess.new(self, "p2") }
after(:each) do |example|
begin
p1.stop
p2.stop
rescue
example.exception = $!
raise
ensure
if example.exception
print_events
end
end
end
def print_events
# Consume any remaining events that went on the channel and print them all
p1.last_event
p2.last_event
events.each_with_index.sort_by { |(message, time), index| [ time, index ] }.each do |(message, time), index|
print "#{time} #{message}\n"
end
end
context "when the lockfile does not already exist" do
context "when a client creates the lockfile but has not yet acquired the lock" do
before { p1.run_to("created lock") }
shared_context "second client gets the lock" do
it "the lockfile is created" do
log_event("lockfile exists? #{File.exist?(lockfile)}")
expect(File.exist?(lockfile)).to be_truthy
end
it "the lockfile is not locked" do
run_lock = Chef::RunLock.new(lockfile)
begin
expect(run_lock.test).to be_truthy
ensure
run_lock.release
end
end
it "the lockfile is empty" do
expect(IO.read(lockfile)).to eq("")
end
context "and a second client gets the lock" do
before { p2.run_to("acquired lock") }
it "the first client does not get the lock until the second finishes" do
p1.run_to("acquired lock") do
p2.run_to_completion
end
end
it "and the first client tries to get the lock and the second is killed, the first client gets the lock immediately" do
p1.run_to("acquired lock") do
sleep BREATHING_ROOM
expect(p1.last_event).to match(/after (started|created lock)/)
p2.stop
end
p1.run_to_completion
end
end
end
context "and the second client has done nothing" do
include_context "second client gets the lock"
end
context "and the second client has created the lockfile but not yet acquired the lock" do
before { p2.run_to("created lock") }
include_context "second client gets the lock"
end
end
context "when a client acquires the lock but has not yet saved the pid" do
before { p1.run_to("acquired lock") }
it "the lockfile is created" do
log_event("lockfile exists? #{File.exist?(lockfile)}")
expect(File.exist?(lockfile)).to be_truthy
end
it "the lockfile is locked" do
run_lock = Chef::RunLock.new(lockfile)
begin
expect(run_lock.test).to be_falsey
ensure
run_lock.release
end
end
it "sets FD_CLOEXEC on the lockfile", supports_cloexec: true do
run_lock = File.open(lockfile)
expect(run_lock.fcntl(Fcntl::F_GETFD, 0) & Fcntl::FD_CLOEXEC).to eq(Fcntl::FD_CLOEXEC)
end
it "the lockfile is empty" do
expect(IO.read(lockfile)).to eq("")
end
it "and a second client tries to acquire the lock, it doesn't get the lock until *after* the first client exits" do
# Start p2 and tell it to move forward in the background
p2.run_to("acquired lock") do
# While p2 is trying to acquire, wait a bit and then let p1 complete
sleep(BREATHING_ROOM)
expect(p2.last_event).to match(/after (started|created lock)/)
p1.run_to_completion
end
p2.run_to_completion
end
it "and a second client tries to get the lock and the first is killed, the second client gets the lock immediately" do
p2.run_to("acquired lock") do
sleep BREATHING_ROOM
expect(p2.last_event).to match(/after (started|created lock)/)
p1.stop
end
p2.run_to_completion
end
end
context "when a client acquires the lock and saves the pid" do
before { p1.run_to("saved pid") }
it "the lockfile is created" do
expect(File.exist?(lockfile)).to be_truthy
end
it "the lockfile is locked" do
run_lock = Chef::RunLock.new(lockfile)
begin
expect(run_lock.test).to be_falsey
ensure
run_lock.release
end
end
it "sets FD_CLOEXEC on the lockfile", supports_cloexec: true do
run_lock = File.open(lockfile)
expect(run_lock.fcntl(Fcntl::F_GETFD, 0) & Fcntl::FD_CLOEXEC).to eq(Fcntl::FD_CLOEXEC)
end
it "the PID is in the lockfile" do
expect(IO.read(lockfile)).to eq p1.pid.to_s
end
it "and a second client tries to acquire the lock, it doesn't get the lock until *after* the first client exits" do
# Start p2 and tell it to move forward in the background
p2.run_to("acquired lock") do
# While p2 is trying to acquire, wait a bit and then let p1 complete
sleep(BREATHING_ROOM)
expect(p2.last_event).to match(/after (started|created lock)/)
p1.run_to_completion
end
p2.run_to_completion
end
it "when a second client tries to get the lock and the first is killed, the second client gets the lock immediately" do
p2.run_to("acquired lock") do
sleep BREATHING_ROOM
expect(p2.last_event).to match(/after (started|created lock)/)
p1.stop
end
p2.run_to_completion
end
end
context "when a client acquires a lock and exits normally" do
before { p1.run_to_completion }
it "the lockfile remains" do
expect(File.exist?(lockfile)).to be_truthy
end
it "the lockfile is not locked" do
run_lock = Chef::RunLock.new(lockfile)
begin
expect(run_lock.test).to be_truthy
ensure
run_lock.release
end
end
it "the PID is in the lockfile" do
expect(IO.read(lockfile)).to eq p1.pid.to_s
end
it "and a second client tries to acquire the lock, it gets the lock immediately" do
p2.run_to_completion
end
end
end
it "test returns true and acquires the lock" do
run_lock = Chef::RunLock.new(lockfile)
from_tests, to_fork = IO.pipe
from_fork, to_tests = IO.pipe
p1 = fork do
expect(run_lock.test).to eq(true)
to_tests.puts "lock acquired"
# Wait for the test to tell us we can exit before exiting
from_tests.readline
exit! 0
end
wait_on_lock(from_fork)
p2 = fork do
expect(run_lock.test).to eq(false)
exit! 0
end
pid, exit_status = Process.waitpid2(p2)
expect(exit_status).to eq(0)
to_fork.puts "you can exit now"
pid, exit_status = Process.waitpid2(p1)
expect(exit_status).to eq(0)
end
it "test returns without waiting when the lock is acquired" do
run_lock = Chef::RunLock.new(lockfile)
from_tests, to_fork = IO.pipe
from_fork, to_tests = IO.pipe
p1 = fork do
run_lock.acquire
to_tests.puts "lock acquired"
# Wait for the test to tell us we can exit before exiting
from_tests.readline
exit! 0
end
wait_on_lock(from_fork)
expect(run_lock.test).to eq(false)
to_fork.puts "you can exit now"
pid, exit_status = Process.waitpid2(p1)
expect(exit_status).to eq(0)
end
end
#
# Runs a process in the background that will:
#
# 1. start up (`started` event)
# 2. acquire the runlock file (`acquired lock` event)
# 3. save the pid to the lockfile (`saved pid` event)
# 4. exit
#
# You control exactly how far the client process goes with the `run_to`
# method: it will stop at any given spot so you can test for race conditions.
#
# It uses a pair of pipes to communicate with the process. The tests will
# send an event name over to the process, which gives the process permission
# to run until it reaches that event (at which point it waits for another event
# name). The process sends the name of each event it reaches back to the tests.
#
class ClientProcess
def initialize(example, name)
@example = example
@name = name
@read_from_process, @write_to_tests = IO.pipe
@read_from_tests, @write_to_process = IO.pipe
end
attr_reader :example
attr_reader :name
attr_reader :pid
def last_event
loop do
line = readline_nonblock(read_from_process)
break if line.nil?
event, time = line.split("@")
example.log_event("#{name}.last_event got #{event}")
example.log_event("[#{name}] #{event}", time.strip)
@last_event = event
end
@last_event
end
def run_to(to_event, &background_block)
example.log_event("#{name}.run_to(#{to_event.inspect})")
# Start the process if it's not started
start unless pid
# Tell the process what to stop at (also means it can go)
write_to_process.print "#{to_event}\n"
# Run the background block
yield if background_block
# Wait until it gets there
Timeout.timeout(CLIENT_PROCESS_TIMEOUT) do
until @last_event == "after #{to_event}"
got_event, time = read_from_process.gets.split("@")
example.log_event("#{name}.last_event got #{got_event}")
example.log_event("[#{name}] #{got_event}", time.strip)
@last_event = got_event
end
end
example.log_event("#{name}.run_to(#{to_event.inspect}) finished")
end
def run_to_completion
example.log_event("#{name}.run_to_completion")
# Start the process if it's not started
start unless pid
# Tell the process to stop at nothing (no blocking)
@write_to_process.print "nothing\n"
# Wait for the process to exit
wait_for_exit
example.log_event("#{name}.run_to_completion finished")
end
def wait_for_exit
example.log_event("#{name}.wait_for_exit (pid #{pid})")
Timeout.timeout(CLIENT_PROCESS_TIMEOUT) do
Process.wait(pid) if pid
end
example.log_event("#{name}.wait_for_exit finished (pid #{pid})")
end
def stop
if pid
example.log_event("#{name}.stop (pid #{pid})")
begin
# Send it the kill signal over and over until it dies
Timeout.timeout(CLIENT_PROCESS_TIMEOUT) do
Process.kill(:KILL, pid)
sleep(0.05) until Process.waitpid2(pid, Process::WNOHANG)
end
example.log_event("#{name}.stop finished (stopped pid #{pid})")
# Process not found is perfectly fine when we're trying to kill a process :)
rescue Errno::ESRCH
example.log_event("#{name}.stop finished (pid #{pid} wasn't running)")
end
end
# close the IO.pipes so we don't leak them as open filehandles
@read_from_process.close rescue nil
@write_to_tests.close rescue nil
@read_from_tests.close rescue nil
@write_to_process.close rescue nil
end
def fire_event(event)
# Let the caller know what event we've reached
write_to_tests.print("after #{event}@#{Time.now.strftime("%H:%M:%S.%L")}\n")
# Block until the client tells us where to stop
if !@run_to_event || event == @run_to_event
write_to_tests.print("waiting for instructions after #{event}@#{Time.now.strftime("%H:%M:%S.%L")}\n")
@run_to_event = read_from_tests.gets.strip
write_to_tests.print("told to run to #{@run_to_event} after #{event}@#{Time.now.strftime("%H:%M:%S.%L")}\n")
elsif @run_to_event
write_to_tests.print("continuing until #{@run_to_event} after #{event}@#{Time.now.strftime("%H:%M:%S.%L")}\n")
end
end
private
attr_reader :read_from_process
attr_reader :write_to_tests
attr_reader :read_from_tests
attr_reader :write_to_process
class TestRunLock < Chef::RunLock
attr_accessor :client_process
def create_lock
super
client_process.fire_event("created lock")
end
end
def start
example.log_event("#{name}.start")
@pid = fork do
begin
Timeout.timeout(CLIENT_PROCESS_TIMEOUT) do
run_lock = TestRunLock.new(example.lockfile)
run_lock.client_process = self
fire_event("started")
run_lock.acquire
fire_event("acquired lock")
run_lock.save_pid
fire_event("saved pid")
exit!(0)
end
rescue
fire_event($!.message.lines.join(" // "))
raise
end
end
example.log_event("#{name}.start forked (pid #{pid})")
end
def readline_nonblock(fd)
buffer = ""
buffer << fd.read_nonblock(1) while buffer[-1] != "\n"
buffer
# rescue IO::EAGAINUnreadable
rescue IO::WaitReadable
unless buffer == ""
sleep 0.1
retry
end
nil
end
end
end
|