Fix for deadlocks when writing while migrating
This commit is contained in:
16
src/serve.c
16
src/serve.c
@@ -579,7 +579,7 @@ void server_close_clients( struct server *params )
|
||||
|
||||
info("closing all clients");
|
||||
|
||||
int i, j;
|
||||
int i; /* , j; */
|
||||
struct client_tbl_entry *entry;
|
||||
|
||||
for( i = 0; i < params->max_nbd_clients; i++ ) {
|
||||
@@ -590,9 +590,17 @@ void server_close_clients( struct server *params )
|
||||
client_signal_stop( entry->client );
|
||||
}
|
||||
}
|
||||
for( j = 0; j < params->max_nbd_clients; j++ ) {
|
||||
join_client_thread( ¶ms->nbd_client[j] );
|
||||
}
|
||||
/* We don't join the clients here. When we enter the final
|
||||
* mirror pass, we get the IO lock, then wait for the server_fd
|
||||
* to close before sending the data, to be sure that no new
|
||||
* clients can be accepted which might think they've written
|
||||
* to the disc. However, an existing client thread can be
|
||||
* waiting for the IO lock already, so if we try to join it
|
||||
* here, we deadlock.
|
||||
*
|
||||
* The client threads will be joined in serve_cleanup.
|
||||
*
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,60 +1,102 @@
|
||||
#!/usr/bin/env ruby
|
||||
require 'rubygems'
|
||||
require 'linux/nbd_client'
|
||||
require 'flexnbd/fake_source'
|
||||
require 'socket'
|
||||
require 'fileutils'
|
||||
|
||||
|
||||
FLEXNBD = ARGV.shift
|
||||
raise "No binary!" unless File.executable?( FLEXNBD )
|
||||
raise "No trickle!" unless system "which trickle > /dev/null"
|
||||
|
||||
Thread.abort_on_exception = true
|
||||
|
||||
SIZE = 1024*1024*1024*10 # 10G
|
||||
|
||||
#BREAKIT = false
|
||||
BREAKIT = true
|
||||
|
||||
MB = 1024*1024
|
||||
GB = 1024*MB
|
||||
SIZE = 20*MB
|
||||
WRITE_DATA = "foo!" * 2048 # 8K write
|
||||
SOURCE_PORT = 9990
|
||||
DEST_PORT = 9991
|
||||
SOURCE_SOCK = "src.sock"
|
||||
DEST_SOCK = "dst.sock"
|
||||
SOURCE_FILE = "src.file"
|
||||
DEST_FILE = "dst.file"
|
||||
|
||||
puts "Cleaning up from old runs..."
|
||||
FileUtils.rm_f "src*"
|
||||
FileUtils.rm_f "dst*"
|
||||
|
||||
puts "Creating source & destination files..."
|
||||
FileUtils.touch("src.file")
|
||||
File.truncate("src.file", SIZE)
|
||||
FileUtils.touch("dst.file")
|
||||
File.truncate("dst.file", SIZE)
|
||||
FileUtils.touch(SOURCE_FILE)
|
||||
File.truncate(SOURCE_FILE, SIZE)
|
||||
FileUtils.touch(DEST_FILE)
|
||||
File.truncate(DEST_FILE, SIZE)
|
||||
|
||||
puts "Making filesystem on source..."
|
||||
result = Kernel.system "/sbin/mkfs.ext3 -F src.file" # some data to start with
|
||||
puts "Result of making filesystem: #{result.inspect}"
|
||||
puts "Making source data"
|
||||
File.open(SOURCE_FILE, "wb"){|f| f.write "a"*SIZE }
|
||||
#Kernel.system "dd if=/dev/urandom of=#{SOURCE_FILE} bs=#{SIZE} count=1"
|
||||
|
||||
puts "Starting destination process..."
|
||||
dst_proc = fork() {
|
||||
exec("flexnbd listen -l 127.0.0.1 -p 9991 -f dst.file -s dst.sock -v >dst.stdout 2>dst.stderr")
|
||||
cmd = "#{FLEXNBD} listen -l 127.0.0.1 -p #{DEST_PORT} -f #{DEST_FILE} -s #{DEST_SOCK} -v"
|
||||
exec cmd
|
||||
}
|
||||
|
||||
puts "Starting source process..."
|
||||
src_proc = fork() {
|
||||
exec("flexnbd serve -l 127.0.0.1 -p 9990 -f src.file -s src.sock -v >src.stdout 2>src.stderr")
|
||||
cmd = "#{FLEXNBD} serve -l 127.0.0.1 -p #{SOURCE_PORT} -f #{SOURCE_FILE} -s #{SOURCE_SOCK} -v"
|
||||
#exec "trickle -t 1 -u 5120 -s #{cmd}"
|
||||
exec cmd
|
||||
}
|
||||
|
||||
puts "dst_proc is #{dst_proc}"
|
||||
puts "src_proc is #{src_proc}"
|
||||
|
||||
|
||||
at_exit {
|
||||
[dst_proc, src_proc].each do |pid|
|
||||
Process.kill( "KILL", pid ) rescue nil
|
||||
end
|
||||
[SOURCE_FILE, DEST_FILE, SOURCE_SOCK, DEST_SOCK].each do |filename|
|
||||
FileUtils.rm_f filename
|
||||
end
|
||||
}
|
||||
|
||||
|
||||
|
||||
puts "Sleeping to let flexnbds run..."
|
||||
sleep 10
|
||||
Timeout.timeout(10) do
|
||||
sleep 0.1 while !File.exists?( SOURCE_SOCK )
|
||||
puts "Got source sock"
|
||||
sleep 0.1 while !File.exists?( DEST_SOCK )
|
||||
puts "Got dest sock"
|
||||
end
|
||||
|
||||
|
||||
if BREAKIT
|
||||
# Start writing to the source
|
||||
puts "Creating thread to write to the source..."
|
||||
|
||||
# Start writing to the source
|
||||
puts "Creating thread to write to the source..."
|
||||
src_writer = Thread.new do
|
||||
client = Linux::NbdClient.new("127.0.0.1", 9990, true)
|
||||
loop do
|
||||
begin
|
||||
client.write(0, WRITE_DATA)
|
||||
rescue => err
|
||||
puts "Writer encountered #{err.inspect} writing to source"
|
||||
break
|
||||
src_writer = Thread.new do
|
||||
client = FlexNBD::FakeSource.new( "127.0.0.1", SOURCE_PORT, "Timed out connecting" )
|
||||
loop do
|
||||
begin
|
||||
client.write(0, WRITE_DATA)
|
||||
rescue => err
|
||||
puts "Writer encountered #{err.inspect} writing to source"
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
puts "Starting mirroring process..."
|
||||
UNIXSocket.open("src.sock") {|sock|
|
||||
sock.write(["mirror", "127.0.0.1", "9991", "unlink"].join("\x0A") + "\x0A\x0A")
|
||||
UNIXSocket.open(SOURCE_SOCK) {|sock|
|
||||
sock.write(["mirror", "127.0.0.1", DEST_PORT.to_s, "unlink"].join("\x0A") + "\x0A\x0A")
|
||||
sock.flush
|
||||
rsp = sock.readline
|
||||
puts "Response: #{rsp}"
|
||||
@@ -62,15 +104,21 @@ UNIXSocket.open("src.sock") {|sock|
|
||||
|
||||
# tell serve to migrate to the listen one
|
||||
|
||||
puts "Waiting for destination to exit..."
|
||||
dst_result = Process::waitpid2(dst_proc)
|
||||
puts "destination exited: #{dst_result.inspect}"
|
||||
Timeout.timeout( 10 ) do
|
||||
start_time = Time.now
|
||||
puts "Waiting for destination #{dst_proc} to exit..."
|
||||
dst_result = Process::waitpid2(dst_proc)
|
||||
puts "destination exited: #{dst_result.inspect}"
|
||||
|
||||
puts "Waiting for source to exit..."
|
||||
src_result = Process::waitpid2(src_proc)
|
||||
puts "Source exited: #{src_result.inspect}"
|
||||
puts "Waiting for source to exit..."
|
||||
src_result = Process::waitpid2(src_proc)
|
||||
puts "Source exited after #{Time.now - start_time} seconds: #{src_result.inspect}"
|
||||
end
|
||||
|
||||
puts "Waiting for writer to die..."
|
||||
src_writer.join
|
||||
puts "Writer has died"
|
||||
|
||||
if BREAKIT
|
||||
puts "Waiting for writer to die..."
|
||||
src_writer.join
|
||||
puts "Writer has died"
|
||||
end
|
||||
|
||||
|
@@ -90,14 +90,19 @@ module FlexNBD
|
||||
|
||||
def send_mirror
|
||||
read_hello()
|
||||
write_write_request( 0, 8 )
|
||||
write_data( "12345678" )
|
||||
write( 0, "12345678" )
|
||||
read_response()
|
||||
write_disconnect_request()
|
||||
close()
|
||||
end
|
||||
|
||||
|
||||
def write( from, data )
|
||||
write_write_request( from, data.length )
|
||||
write_data( data )
|
||||
end
|
||||
|
||||
|
||||
def read_response
|
||||
magic = @sock.read(4)
|
||||
error_s = @sock.read(4)
|
||||
|
Reference in New Issue
Block a user