Handle a failed disconnect correctly

If the sender disconnects its socket before sending the disconnect
message, the destination should restart the migration process.  This
patch makes sure that happens.
This commit is contained in:
Alex Young
2012-07-12 09:39:39 +01:00
parent f3cebcdcd5
commit eb90308b6e
9 changed files with 154 additions and 14 deletions

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env ruby
# encoding: utf-8
# Receive a mirror, and disconnect after sending the entrust reply but
# before it can send the disconnect signal.
#
# This test is currently unused: the sender can't detect that the
# write failed.
require 'flexnbd/fake_dest'
include FlexNBD
addr, port, src_pid = *ARGV
server = FakeDest.new( addr, port )
client = server.accept
client.write_hello
while (req = client.read_request; req[:type] == 1)
client.read_data( req[:len] )
client.write_reply( req[:handle] )
end
system "kill -STOP #{src_pid}"
client.write_reply( req[:handle] )
client.close
system "kill -CONT #{src_pid}"
sleep( 0.25 )
client2 = server.accept( "Timed out waiting for a reconnection" )
client2.close
server.close
$stderr.puts "done"
exit(0)

View File

@@ -0,0 +1,34 @@
#!/usr/bin/env ruby
# encoding: utf-8
# Receive a mirror, but respond to the entrust with an error. There's
# currently no code path in flexnbd which can do this, but we could
# add one.
require 'flexnbd/fake_dest'
include FlexNBD
addr, port = *ARGV
server = FakeDest.new( addr, port )
client = server.accept
client.write_hello
loop do
req = client.read_request
if req[:type] == 1
client.read_data( req[:len] )
client.write_reply( req[:handle] )
else
client.write_reply( req[:handle], 1 )
break
end
end
client.close
client2 = server.accept( "Timed out waiting for a reconnection" )
client2.close
server.close
exit(0)

View File

@@ -0,0 +1,32 @@
#!/usr/bin/env ruby
# Connect, send a migration, entrust then *immediately* disconnect.
# This simulates a client which fails while the client is blocked.
#
# We attempt to reconnect immediately afterwards to prove that we can
# retry the mirroring.
require 'flexnbd/fake_source'
include FlexNBD
addr, port, srv_pid = *ARGV
client = FakeSource.new( addr, port, "Timed out connecting" )
client.read_hello
client.write_write_request( 0, 8 )
client.write_data( "12345678" )
client.write_entrust_request
client.read_response
client.close
sleep(0.25)
client2 = FakeSource.new( addr, port, "Timed out reconnecting to mirror" )
client2.send_mirror
sleep(0.25)
client3 = FakeSource.new( addr, port, "Timed out reconnecting to read" )
client3.close
exit(0)

View File

@@ -94,7 +94,7 @@ module FlexNBD
end
def receive_mirror
def receive_mirror( opts = {} )
write_hello()
loop do
req = read_request
@@ -103,7 +103,7 @@ module FlexNBD
read_data( req[:len] )
write_reply( req[:handle] )
when 65536
write_reply( req[:handle] )
write_reply( req[:handle], opts[:err] == :entrust ? 1 : 0 )
break
else
raise "Unexpected request: #{req.inspect}"

View File

@@ -50,12 +50,27 @@ module FlexNBD
send_request( 65536, handle )
end
def write_disconnect_request( handle="myhandle" )
send_request( 2, handle )
end
def write_data( data )
@sock.write( data )
end
def send_mirror
read_hello()
write_write_request( 0, 8 )
write_data( "12345678" )
read_response()
write_entrust_request()
read_response()
write_disconnect_request()
close()
end
def read_response
magic = @sock.read(4)

View File

@@ -66,6 +66,14 @@ class TestDestErrorHandling < Test::Unit::TestCase
run_fake( "source/close_after_write_data" )
end
def test_disconnect_after_entrust_reply_causes_error
@env.nbd1.can_die(0)
# This fake runs a failed migration then a succeeding one, so we
# expect the destination to take control.
run_fake( "source/close_after_entrust_reply" )
end
private
def run_fake( name )
@env.run_fake( name, @env.ip, @env.port1 )

View File

@@ -85,6 +85,12 @@ class TestSourceErrorHandling < Test::Unit::TestCase
end
def test_entrust_error_causes_retry
run_fake( "dest/error_on_entrust" )
end
private
def run_fake(name, opts = {})
@env.run_fake( name, @env.ip, @env.port2 )