Simplify the migration handover protocol

The three-way hand-off has a problem: there's no way to arrange for the state of the migration to be unambiguous in case of failure. If the final "disconnect" message is lost (as in, the destination never receives it whether it is sent by the sender or not), the destination has no option but to quit with an error status and let a human sort it out. However, at that point we can either arrange to have a .INCOMPLETE file still on disc or not - and it doesn't matter which we choose, we can still end up with dataloss by picking a specific calamity to have befallen the sender. Given this, it makes sense to fall back to a simpler protocol: just send all the data, then send a "disconnect" message. This has the same downside that we need a human to sort out specific failure cases, but combined with --unlink before sending "disconnect" (see next patch) it will always be possible for a human to disambiguate, whether the destination quit with an error status or not.
2012-07-23 10:22:25 +01:00
parent f6f4266fd6
commit fd935ce4c9
16 changed files with 54 additions and 212 deletions
--- a/tests/acceptance/fakes/dest/close_after_entrust.rb
+++ b/tests/acceptance/fakes/dest/close_after_entrust.rb
@@ -1,29 +0,0 @@
-#!/usr/bin/env ruby
-# encoding: utf-8
-
-# Open a server, accept a client, then we expect a single write
-# followed by an entrust. Disconnect after the entrust. We expect a
-# reconnection followed by a full mirror.
-
-require 'flexnbd/fake_dest'
-include FlexNBD
-
-addr, port, src_pid = *ARGV
-server = FakeDest.new( addr, port )
-client = server.accept
-
-client.write_hello
-write_req = client.read_request
-data = client.read_data( write_req[:len] )
-client.write_reply( write_req[:handle], 0 )
-
-entrust_req = client.read_request
-fail "Not an entrust" unless entrust_req[:type] == 65536
-client.close
-
-client2 = server.accept
-client2.receive_mirror
-
-
-exit(0)
-
--- a/tests/acceptance/fakes/dest/close_after_writes.rb
+++ b/tests/acceptance/fakes/dest/close_after_writes.rb
@@ -3,7 +3,8 @@

 # Open a server, accept a client, then we expect a single write
 # followed by an entrust.  However, we disconnect after the write so
-# the entrust will fail.  We expect a reconnection.
+# the entrust will fail.  We don't expect a reconnection: the sender
+# can't reliably spot a failed send.

 require 'flexnbd/fake_dest'
 include FlexNBD
@@ -21,7 +22,4 @@ client.write_reply( req[:handle], 0 )
 client.close
 Process.kill("CONT", src_pid.to_i)

-client2 = server.accept
-client2.close
-
 exit(0)
--- a/tests/acceptance/fakes/dest/error_on_entrust.rb
+++ b/tests/acceptance/fakes/dest/error_on_entrust.rb
@@ -1,34 +0,0 @@
-#!/usr/bin/env ruby
-# encoding: utf-8
-
-# Receive a mirror, but respond to the entrust with an error.  There's
-# currently no code path in flexnbd which can do this, but we could
-# add one.
-
-require 'flexnbd/fake_dest'
-include FlexNBD
-
-addr, port = *ARGV
-server = FakeDest.new( addr, port )
-client = server.accept
-
-client.write_hello
-loop do
-  req = client.read_request
-  if req[:type] == 1
-    client.read_data( req[:len] )
-    client.write_reply( req[:handle] )
-  else
-    client.write_reply( req[:handle], 1 )
-    break
-  end
-end
-
-client.close
-
-client2 = server.accept( "Timed out waiting for a reconnection" )
-
-client2.close
-server.close
-
-exit(0)
--- a/tests/acceptance/flexnbd/fake_source.rb
+++ b/tests/acceptance/flexnbd/fake_source.rb
@@ -93,8 +93,6 @@ module FlexNBD
      write_write_request( 0, 8 )
      write_data( "12345678" )
      read_response()
-      write_entrust_request()
-      read_response()
      write_disconnect_request()
      close()
    end
--- a/tests/acceptance/test_dest_error_handling.rb
+++ b/tests/acceptance/test_dest_error_handling.rb
@@ -58,10 +58,6 @@ class TestDestErrorHandling  < Test::Unit::TestCase
    run_fake( "source/close_after_write" )
  end

-  def test_disconnect_before_entrust_reply_causes_error
-    run_fake( "source/close_after_entrust" )
-  end
-

  def test_disconnect_before_write_reply_causes_error
    # Note that this is an odd case: writing the reply doesn't fail.
@@ -71,14 +67,6 @@ class TestDestErrorHandling  < Test::Unit::TestCase
  end


-  def test_disconnect_after_entrust_reply_causes_error
-    @env.nbd1.can_die(0)
-    # This fake runs a failed migration then a succeeding one, so we
-    # expect the destination to take control.
-    run_fake( "source/close_after_entrust_reply" )
-    assert_control
-  end
-

  def test_straight_migration
    @env.nbd1.can_die(0)
--- a/tests/acceptance/test_happy_path.rb
+++ b/tests/acceptance/test_happy_path.rb
@@ -72,13 +72,14 @@ class TestHappyPath < Test::Unit::TestCase
    @env.listen2

    @env.nbd1.can_die
+    @env.nbd2.can_die(0)
    stdout, stderr = @env.mirror12

    @env.nbd1.join
+    @env.nbd2.join

    assert_equal(@env.file1.read_original( 0, @env.blocksize ),
                 @env.file2.read( 0, @env.blocksize ) )
-    assert @env.status2['has_control'], "destination didn't take control"
  end


--- a/tests/acceptance/test_source_error_handling.rb
+++ b/tests/acceptance/test_source_error_handling.rb
@@ -79,17 +79,6 @@ class TestSourceErrorHandling < Test::Unit::TestCase
  end


-  def test_post_entrust_disconnect_causes_retry
-    @env.nbd1.can_die(0)
-    run_fake( "dest/close_after_entrust" )
-  end
-
-
-  def test_entrust_error_causes_retry
-    run_fake( "dest/error_on_entrust" )
-  end
-
-
  def test_cancel_migration
    run_fake( "dest/break_after_hello" )
  end