From 8b43321ef2c3fdd55f34ebab643976fc04e3251c Mon Sep 17 00:00:00 2001 From: Alex Young Date: Thu, 13 Sep 2012 12:21:43 +0100 Subject: [PATCH] Fix for deadlocks when writing while migrating --- src/serve.c | 16 ++- .../acceptance/active_migration_test/check.rb | 114 +++++++++++++----- tests/acceptance/flexnbd/fake_source.rb | 9 +- 3 files changed, 100 insertions(+), 39 deletions(-) diff --git a/src/serve.c b/src/serve.c index 4011350..777a3f6 100644 --- a/src/serve.c +++ b/src/serve.c @@ -579,7 +579,7 @@ void server_close_clients( struct server *params ) info("closing all clients"); - int i, j; + int i; /* , j; */ struct client_tbl_entry *entry; for( i = 0; i < params->max_nbd_clients; i++ ) { @@ -590,9 +590,17 @@ void server_close_clients( struct server *params ) client_signal_stop( entry->client ); } } - for( j = 0; j < params->max_nbd_clients; j++ ) { - join_client_thread( ¶ms->nbd_client[j] ); - } + /* We don't join the clients here. When we enter the final + * mirror pass, we get the IO lock, then wait for the server_fd + * to close before sending the data, to be sure that no new + * clients can be accepted which might think they've written + * to the disc. However, an existing client thread can be + * waiting for the IO lock already, so if we try to join it + * here, we deadlock. + * + * The client threads will be joined in serve_cleanup. + * + */ } diff --git a/tests/acceptance/active_migration_test/check.rb b/tests/acceptance/active_migration_test/check.rb index 407ce0b..ca61445 100755 --- a/tests/acceptance/active_migration_test/check.rb +++ b/tests/acceptance/active_migration_test/check.rb @@ -1,60 +1,102 @@ #!/usr/bin/env ruby require 'rubygems' -require 'linux/nbd_client' +require 'flexnbd/fake_source' require 'socket' require 'fileutils' + +FLEXNBD = ARGV.shift +raise "No binary!" unless File.executable?( FLEXNBD ) +raise "No trickle!" unless system "which trickle > /dev/null" + Thread.abort_on_exception = true -SIZE = 1024*1024*1024*10 # 10G + +#BREAKIT = false +BREAKIT = true + +MB = 1024*1024 +GB = 1024*MB +SIZE = 20*MB WRITE_DATA = "foo!" * 2048 # 8K write +SOURCE_PORT = 9990 +DEST_PORT = 9991 +SOURCE_SOCK = "src.sock" +DEST_SOCK = "dst.sock" +SOURCE_FILE = "src.file" +DEST_FILE = "dst.file" puts "Cleaning up from old runs..." FileUtils.rm_f "src*" FileUtils.rm_f "dst*" puts "Creating source & destination files..." -FileUtils.touch("src.file") -File.truncate("src.file", SIZE) -FileUtils.touch("dst.file") -File.truncate("dst.file", SIZE) +FileUtils.touch(SOURCE_FILE) +File.truncate(SOURCE_FILE, SIZE) +FileUtils.touch(DEST_FILE) +File.truncate(DEST_FILE, SIZE) -puts "Making filesystem on source..." -result = Kernel.system "/sbin/mkfs.ext3 -F src.file" # some data to start with -puts "Result of making filesystem: #{result.inspect}" +puts "Making source data" +File.open(SOURCE_FILE, "wb"){|f| f.write "a"*SIZE } +#Kernel.system "dd if=/dev/urandom of=#{SOURCE_FILE} bs=#{SIZE} count=1" puts "Starting destination process..." dst_proc = fork() { - exec("flexnbd listen -l 127.0.0.1 -p 9991 -f dst.file -s dst.sock -v >dst.stdout 2>dst.stderr") + cmd = "#{FLEXNBD} listen -l 127.0.0.1 -p #{DEST_PORT} -f #{DEST_FILE} -s #{DEST_SOCK} -v" + exec cmd } puts "Starting source process..." src_proc = fork() { - exec("flexnbd serve -l 127.0.0.1 -p 9990 -f src.file -s src.sock -v >src.stdout 2>src.stderr") + cmd = "#{FLEXNBD} serve -l 127.0.0.1 -p #{SOURCE_PORT} -f #{SOURCE_FILE} -s #{SOURCE_SOCK} -v" + #exec "trickle -t 1 -u 5120 -s #{cmd}" + exec cmd } +puts "dst_proc is #{dst_proc}" +puts "src_proc is #{src_proc}" + + +at_exit { + [dst_proc, src_proc].each do |pid| + Process.kill( "KILL", pid ) rescue nil + end + [SOURCE_FILE, DEST_FILE, SOURCE_SOCK, DEST_SOCK].each do |filename| + FileUtils.rm_f filename + end +} + + + puts "Sleeping to let flexnbds run..." -sleep 10 +Timeout.timeout(10) do + sleep 0.1 while !File.exists?( SOURCE_SOCK ) + puts "Got source sock" + sleep 0.1 while !File.exists?( DEST_SOCK ) + puts "Got dest sock" +end +if BREAKIT + # Start writing to the source + puts "Creating thread to write to the source..." -# Start writing to the source -puts "Creating thread to write to the source..." -src_writer = Thread.new do - client = Linux::NbdClient.new("127.0.0.1", 9990, true) - loop do - begin - client.write(0, WRITE_DATA) - rescue => err - puts "Writer encountered #{err.inspect} writing to source" - break + src_writer = Thread.new do + client = FlexNBD::FakeSource.new( "127.0.0.1", SOURCE_PORT, "Timed out connecting" ) + loop do + begin + client.write(0, WRITE_DATA) + rescue => err + puts "Writer encountered #{err.inspect} writing to source" + break + end end end end puts "Starting mirroring process..." -UNIXSocket.open("src.sock") {|sock| - sock.write(["mirror", "127.0.0.1", "9991", "unlink"].join("\x0A") + "\x0A\x0A") +UNIXSocket.open(SOURCE_SOCK) {|sock| + sock.write(["mirror", "127.0.0.1", DEST_PORT.to_s, "unlink"].join("\x0A") + "\x0A\x0A") sock.flush rsp = sock.readline puts "Response: #{rsp}" @@ -62,15 +104,21 @@ UNIXSocket.open("src.sock") {|sock| # tell serve to migrate to the listen one -puts "Waiting for destination to exit..." -dst_result = Process::waitpid2(dst_proc) -puts "destination exited: #{dst_result.inspect}" +Timeout.timeout( 10 ) do + start_time = Time.now + puts "Waiting for destination #{dst_proc} to exit..." + dst_result = Process::waitpid2(dst_proc) + puts "destination exited: #{dst_result.inspect}" -puts "Waiting for source to exit..." -src_result = Process::waitpid2(src_proc) -puts "Source exited: #{src_result.inspect}" + puts "Waiting for source to exit..." + src_result = Process::waitpid2(src_proc) + puts "Source exited after #{Time.now - start_time} seconds: #{src_result.inspect}" +end -puts "Waiting for writer to die..." -src_writer.join -puts "Writer has died" + +if BREAKIT + puts "Waiting for writer to die..." + src_writer.join + puts "Writer has died" +end diff --git a/tests/acceptance/flexnbd/fake_source.rb b/tests/acceptance/flexnbd/fake_source.rb index 18a8907..1f14719 100644 --- a/tests/acceptance/flexnbd/fake_source.rb +++ b/tests/acceptance/flexnbd/fake_source.rb @@ -90,14 +90,19 @@ module FlexNBD def send_mirror read_hello() - write_write_request( 0, 8 ) - write_data( "12345678" ) + write( 0, "12345678" ) read_response() write_disconnect_request() close() end + def write( from, data ) + write_write_request( from, data.length ) + write_data( data ) + end + + def read_response magic = @sock.read(4) error_s = @sock.read(4)