Simplify the migration handover protocol
The three-way hand-off has a problem: there's no way to arrange for the state of the migration to be unambiguous in case of failure. If the final "disconnect" message is lost (as in, the destination never receives it whether it is sent by the sender or not), the destination has no option but to quit with an error status and let a human sort it out. However, at that point we can either arrange to have a .INCOMPLETE file still on disc or not - and it doesn't matter which we choose, we can still end up with dataloss by picking a specific calamity to have befallen the sender. Given this, it makes sense to fall back to a simpler protocol: just send all the data, then send a "disconnect" message. This has the same downside that we need a human to sort out specific failure cases, but combined with --unlink before sending "disconnect" (see next patch) it will always be possible for a human to disambiguate, whether the destination quit with an error status or not.
This commit is contained in:
@@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
# Open a server, accept a client, then we expect a single write
|
||||
# followed by an entrust. Disconnect after the entrust. We expect a
|
||||
# reconnection followed by a full mirror.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, src_pid = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept
|
||||
|
||||
client.write_hello
|
||||
write_req = client.read_request
|
||||
data = client.read_data( write_req[:len] )
|
||||
client.write_reply( write_req[:handle], 0 )
|
||||
|
||||
entrust_req = client.read_request
|
||||
fail "Not an entrust" unless entrust_req[:type] == 65536
|
||||
client.close
|
||||
|
||||
client2 = server.accept
|
||||
client2.receive_mirror
|
||||
|
||||
|
||||
exit(0)
|
||||
|
@@ -3,7 +3,8 @@
|
||||
|
||||
# Open a server, accept a client, then we expect a single write
|
||||
# followed by an entrust. However, we disconnect after the write so
|
||||
# the entrust will fail. We expect a reconnection.
|
||||
# the entrust will fail. We don't expect a reconnection: the sender
|
||||
# can't reliably spot a failed send.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
@@ -21,7 +22,4 @@ client.write_reply( req[:handle], 0 )
|
||||
client.close
|
||||
Process.kill("CONT", src_pid.to_i)
|
||||
|
||||
client2 = server.accept
|
||||
client2.close
|
||||
|
||||
exit(0)
|
||||
|
@@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
# Receive a mirror, but respond to the entrust with an error. There's
|
||||
# currently no code path in flexnbd which can do this, but we could
|
||||
# add one.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept
|
||||
|
||||
client.write_hello
|
||||
loop do
|
||||
req = client.read_request
|
||||
if req[:type] == 1
|
||||
client.read_data( req[:len] )
|
||||
client.write_reply( req[:handle] )
|
||||
else
|
||||
client.write_reply( req[:handle], 1 )
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
client.close
|
||||
|
||||
client2 = server.accept( "Timed out waiting for a reconnection" )
|
||||
|
||||
client2.close
|
||||
server.close
|
||||
|
||||
exit(0)
|
@@ -93,8 +93,6 @@ module FlexNBD
|
||||
write_write_request( 0, 8 )
|
||||
write_data( "12345678" )
|
||||
read_response()
|
||||
write_entrust_request()
|
||||
read_response()
|
||||
write_disconnect_request()
|
||||
close()
|
||||
end
|
||||
|
@@ -58,10 +58,6 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
run_fake( "source/close_after_write" )
|
||||
end
|
||||
|
||||
def test_disconnect_before_entrust_reply_causes_error
|
||||
run_fake( "source/close_after_entrust" )
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_before_write_reply_causes_error
|
||||
# Note that this is an odd case: writing the reply doesn't fail.
|
||||
@@ -71,14 +67,6 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_after_entrust_reply_causes_error
|
||||
@env.nbd1.can_die(0)
|
||||
# This fake runs a failed migration then a succeeding one, so we
|
||||
# expect the destination to take control.
|
||||
run_fake( "source/close_after_entrust_reply" )
|
||||
assert_control
|
||||
end
|
||||
|
||||
|
||||
def test_straight_migration
|
||||
@env.nbd1.can_die(0)
|
||||
|
@@ -72,13 +72,14 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
@env.listen2
|
||||
|
||||
@env.nbd1.can_die
|
||||
@env.nbd2.can_die(0)
|
||||
stdout, stderr = @env.mirror12
|
||||
|
||||
@env.nbd1.join
|
||||
@env.nbd2.join
|
||||
|
||||
assert_equal(@env.file1.read_original( 0, @env.blocksize ),
|
||||
@env.file2.read( 0, @env.blocksize ) )
|
||||
assert @env.status2['has_control'], "destination didn't take control"
|
||||
end
|
||||
|
||||
|
||||
|
@@ -79,17 +79,6 @@ class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
|
||||
def test_post_entrust_disconnect_causes_retry
|
||||
@env.nbd1.can_die(0)
|
||||
run_fake( "dest/close_after_entrust" )
|
||||
end
|
||||
|
||||
|
||||
def test_entrust_error_causes_retry
|
||||
run_fake( "dest/error_on_entrust" )
|
||||
end
|
||||
|
||||
|
||||
def test_cancel_migration
|
||||
run_fake( "dest/break_after_hello" )
|
||||
end
|
||||
|
Reference in New Issue
Block a user