Simplify the migration handover protocol
The three-way hand-off has a problem: there's no way to arrange for the state of the migration to be unambiguous in case of failure. If the final "disconnect" message is lost (as in, the destination never receives it whether it is sent by the sender or not), the destination has no option but to quit with an error status and let a human sort it out. However, at that point we can either arrange to have a .INCOMPLETE file still on disc or not - and it doesn't matter which we choose, we can still end up with dataloss by picking a specific calamity to have befallen the sender. Given this, it makes sense to fall back to a simpler protocol: just send all the data, then send a "disconnect" message. This has the same downside that we need a human to sort out specific failure cases, but combined with --unlink before sending "disconnect" (see next patch) it will always be possible for a human to disambiguate, whether the destination quit with an error status or not.
This commit is contained in:
41
src/client.c
41
src/client.c
@@ -32,8 +32,6 @@ struct client *client_create( struct server *serve, int socket )
|
||||
|
||||
c->stop_signal = self_pipe_create();
|
||||
|
||||
c->entrusted = 0;
|
||||
|
||||
debug( "Alloced client %p (%d, %d)", c, c->stop_signal->read_fd, c->stop_signal->write_fd );
|
||||
return c;
|
||||
}
|
||||
@@ -341,8 +339,6 @@ void client_flush( struct client * client, size_t len )
|
||||
* Returns 1 if we do, 0 otherwise.
|
||||
* request_err is set to 0 if the client sent a bad request, in which
|
||||
* case we drop the connection.
|
||||
* FIXME: after an ENTRUST, there's no way to distinguish between a
|
||||
* DISCONNECT and any bad request.
|
||||
*/
|
||||
int client_request_needs_reply( struct client * client,
|
||||
struct nbd_request request )
|
||||
@@ -356,14 +352,8 @@ int client_request_needs_reply( struct client * client,
|
||||
switch (request.type)
|
||||
{
|
||||
case REQUEST_READ:
|
||||
ERROR_IF( client->entrusted,
|
||||
"Received a read request "
|
||||
"after an entrust message.");
|
||||
break;
|
||||
case REQUEST_WRITE:
|
||||
ERROR_IF( client->entrusted,
|
||||
"Received a write request "
|
||||
"after an entrust message.");
|
||||
/* check it's not out of range */
|
||||
if ( request.from+request.len > client->serve->size) {
|
||||
warn("write request %d+%d out of range",
|
||||
@@ -377,11 +367,6 @@ int client_request_needs_reply( struct client * client,
|
||||
}
|
||||
break;
|
||||
|
||||
case REQUEST_ENTRUST:
|
||||
/* Yes, we need to reply to an entrust, but we take no
|
||||
* further action */
|
||||
debug("request entrust");
|
||||
break;
|
||||
case REQUEST_DISCONNECT:
|
||||
debug("request disconnect");
|
||||
client->disconnect = 1;
|
||||
@@ -394,19 +379,6 @@ int client_request_needs_reply( struct client * client,
|
||||
}
|
||||
|
||||
|
||||
void client_reply_to_entrust( struct client * client, struct nbd_request request )
|
||||
{
|
||||
/* An entrust needs a response, but has no data. */
|
||||
debug( "request entrust" );
|
||||
|
||||
client_write_reply( client, &request, 0 );
|
||||
/* We set this after trying to send the reply, so we know the
|
||||
* reply got away safely.
|
||||
*/
|
||||
client->entrusted = 1;
|
||||
}
|
||||
|
||||
|
||||
void client_reply_to_read( struct client* client, struct nbd_request request )
|
||||
{
|
||||
off64_t offset;
|
||||
@@ -478,9 +450,6 @@ void client_reply( struct client* client, struct nbd_request request )
|
||||
case REQUEST_WRITE:
|
||||
client_reply_to_write( client, request );
|
||||
break;
|
||||
case REQUEST_ENTRUST:
|
||||
client_reply_to_entrust( client, request );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -489,11 +458,11 @@ void client_reply( struct client* client, struct nbd_request request )
|
||||
int client_serve_request(struct client* client)
|
||||
{
|
||||
struct nbd_request request = {0};
|
||||
int failure = 1;
|
||||
int stop = 1;
|
||||
int disconnected = 0;
|
||||
|
||||
if ( !client_read_request( client, &request, &disconnected ) ) { return failure; }
|
||||
if ( disconnected ) { return failure; }
|
||||
if ( !client_read_request( client, &request, &disconnected ) ) { return stop; }
|
||||
if ( disconnected ) { return stop; }
|
||||
if ( !client_request_needs_reply( client, request ) ) {
|
||||
return client->disconnect;
|
||||
}
|
||||
@@ -502,7 +471,7 @@ int client_serve_request(struct client* client)
|
||||
{
|
||||
if ( !server_is_closed( client->serve ) ) {
|
||||
client_reply( client, request );
|
||||
failure = 0;
|
||||
stop = 0;
|
||||
}
|
||||
}
|
||||
server_unlock_io( client->serve );
|
||||
@@ -557,7 +526,6 @@ void* client_serve(void* client_uncast)
|
||||
debug("client: stopped serving requests");
|
||||
client->stopped = 1;
|
||||
|
||||
if ( client->entrusted ) {
|
||||
if ( client->disconnect ){
|
||||
debug("client: control arrived" );
|
||||
server_control_arrived( client->serve );
|
||||
@@ -565,7 +533,6 @@ void* client_serve(void* client_uncast)
|
||||
else {
|
||||
warn( "client: control transfer failed." );
|
||||
}
|
||||
}
|
||||
|
||||
FATAL_IF_NEGATIVE(
|
||||
close(client->socket),
|
||||
|
@@ -28,9 +28,6 @@ struct client {
|
||||
|
||||
struct server* serve; /* FIXME: remove above duplication */
|
||||
|
||||
/* Have we seen a REQUEST_ENTRUST message? */
|
||||
int entrusted;
|
||||
|
||||
/* Have we seen a REQUEST_DISCONNECT message? */
|
||||
int disconnect;
|
||||
};
|
||||
|
76
src/mirror.c
76
src/mirror.c
@@ -213,64 +213,10 @@ int mirror_pass(struct server * serve, int should_lock, uint64_t *written)
|
||||
}
|
||||
|
||||
|
||||
void mirror_give_control( struct mirror * mirror )
|
||||
{
|
||||
debug( "mirror: entrusting and disconnecting" );
|
||||
/* TODO: set up an error handler to clean up properly on ERROR.
|
||||
*/
|
||||
|
||||
/* A transfer of control is expressed as a 3-way handshake.
|
||||
* First, We send a REQUEST_ENTRUST. If this fails to be
|
||||
* received, this thread will simply block until the server is
|
||||
* restarted. If the remote end doesn't understand it, it'll
|
||||
* disconnect us, and an ERROR *should* bomb this thread.
|
||||
* FIXME: make the ERROR work.
|
||||
* If we get an explicit error back from the remote end, then
|
||||
* again, this thread will bomb out.
|
||||
* On receiving a valid response, we send a REQUEST_DISCONNECT,
|
||||
* and we quit without checking for a response. This is the
|
||||
* remote server's signal to assume control of the file. The
|
||||
* reason we don't check for a response is the state we end up
|
||||
* in if the final message goes astray: if we lose the
|
||||
* REQUEST_DISCONNECT, the sender has quit and the receiver
|
||||
* hasn't had a signal to take over yet, so the data is safe.
|
||||
* If we were to wait for a response to the REQUEST_DISCONNECT,
|
||||
* the sender and receiver would *both* be servicing write
|
||||
* requests while the response was in flight, and if the
|
||||
* response went astray we'd have two servers claiming
|
||||
* responsibility for the same data.
|
||||
*
|
||||
* The meaning of these is as follows:
|
||||
* The entrust signifies that all the data has been sent, and
|
||||
* the client is currently paused but not disconnected.
|
||||
* The disconnect signifies that the client has been
|
||||
* safely prevented from making any more writes.
|
||||
*
|
||||
* Since we lock io and close the server it in mirror_on_exit before
|
||||
* releasing, we don't actually need to take any action between the
|
||||
* two here.
|
||||
*/
|
||||
socket_nbd_entrust( mirror->client );
|
||||
socket_nbd_disconnect( mirror->client );
|
||||
}
|
||||
|
||||
|
||||
/* THIS FUNCTION MUST ONLY BE CALLED WITH THE SERVER'S IO LOCKED. */
|
||||
void mirror_on_exit( struct server * serve )
|
||||
{
|
||||
/* Send an explicit entrust and disconnect. After this
|
||||
* point we cannot allow any reads or writes to the local file.
|
||||
* We do this *before* trying to shut down the server so that if
|
||||
* the transfer of control fails, we haven't stopped the server
|
||||
* and already-connected clients don't get needlessly
|
||||
* disconnected.
|
||||
*/
|
||||
debug( "mirror_give_control");
|
||||
mirror_give_control( serve->mirror );
|
||||
|
||||
/* If we're still here, the transfer of control went ok, and the
|
||||
* remote is listening (or will be shortly). We can shut the
|
||||
* server down.
|
||||
/* If we're still here, we can shut the server down.
|
||||
*
|
||||
* It doesn't matter if we get new client connections before
|
||||
* now, the IO lock will stop them from doing anything.
|
||||
@@ -287,6 +233,14 @@ void mirror_on_exit( struct server * serve )
|
||||
*/
|
||||
debug("serve_wait_for_close");
|
||||
serve_wait_for_close( serve );
|
||||
|
||||
debug("Unlinking %s", serve->filename );
|
||||
if ( ACTION_UNLINK == serve->mirror->action_at_finish ) {
|
||||
server_unlink( serve->mirror );
|
||||
}
|
||||
|
||||
debug("Sending disconnect");
|
||||
socket_nbd_disconnect( serve->mirror->client );
|
||||
info("Mirror sent.");
|
||||
}
|
||||
|
||||
@@ -364,6 +318,16 @@ int mirror_connect( struct mirror * mirror, off64_t local_size )
|
||||
}
|
||||
|
||||
|
||||
int mirror_should_quit( struct mirror * mirror )
|
||||
{
|
||||
switch( mirror->action_at_finish ) {
|
||||
case ACTION_EXIT:
|
||||
case ACTION_UNLINK:
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void mirror_run( struct server *serve )
|
||||
{
|
||||
@@ -389,7 +353,7 @@ void mirror_run( struct server *serve )
|
||||
server_lock_io( serve );
|
||||
{
|
||||
if ( mirror_pass( serve, 0, &written ) &&
|
||||
ACTION_EXIT == serve->mirror->action_at_finish) {
|
||||
mirror_should_quit( serve->mirror ) ) {
|
||||
debug("exit!");
|
||||
mirror_on_exit( serve );
|
||||
info("Server closed, quitting "
|
||||
|
@@ -44,6 +44,7 @@ enum mirror_state;
|
||||
|
||||
enum mirror_finish_action {
|
||||
ACTION_EXIT,
|
||||
ACTION_UNLINK,
|
||||
ACTION_NOTHING
|
||||
};
|
||||
|
||||
|
@@ -10,7 +10,6 @@
|
||||
#define REQUEST_READ 0
|
||||
#define REQUEST_WRITE 1
|
||||
#define REQUEST_DISCONNECT 2
|
||||
#define REQUEST_ENTRUST (1<<16)
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <inttypes.h>
|
||||
|
@@ -152,18 +152,6 @@ void socket_nbd_write(int fd, off64_t from, int len, int in_fd, void* in_buf, in
|
||||
}
|
||||
|
||||
|
||||
void socket_nbd_entrust( int fd )
|
||||
{
|
||||
struct nbd_request request;
|
||||
struct nbd_reply reply;
|
||||
|
||||
fill_request( &request, REQUEST_ENTRUST, 0, 0 );
|
||||
FATAL_IF_NEGATIVE( writeloop( fd, &request, sizeof( request ) ),
|
||||
"Couldn't write request");
|
||||
read_reply( fd, &request, &reply );
|
||||
}
|
||||
|
||||
|
||||
int socket_nbd_disconnect( int fd )
|
||||
{
|
||||
int success = 1;
|
||||
|
@@ -9,7 +9,6 @@ int socket_connect(struct sockaddr* to, struct sockaddr* from);
|
||||
int socket_nbd_read_hello(int fd, off64_t * size);
|
||||
void socket_nbd_read(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs);
|
||||
void socket_nbd_write(int fd, off64_t from, int len, int out_fd, void* out_buf, int timeout_secs);
|
||||
void socket_nbd_entrust(int fd);
|
||||
int socket_nbd_disconnect( int fd );
|
||||
|
||||
#endif
|
||||
|
17
src/serve.c
17
src/serve.c
@@ -115,6 +115,19 @@ void server_destroy( struct server * serve )
|
||||
}
|
||||
|
||||
|
||||
void server_unlink( struct server * serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
NULLCHECK( serve->filename );
|
||||
|
||||
FATAL_IF_NEGATIVE( unlink( serve->filename ),
|
||||
"Failed to unlink %s: %s",
|
||||
serve->filename,
|
||||
strerror( errno ) );
|
||||
|
||||
}
|
||||
|
||||
|
||||
void server_dirty(struct server *serve, off64_t from, int len)
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
@@ -694,15 +707,17 @@ void serve_wait_for_close( struct server * serve )
|
||||
}
|
||||
}
|
||||
|
||||
/* We've just had an ENTRUST/DISCONNECT pair, so we need to shut down
|
||||
/* We've just had an DISCONNECT pair, so we need to shut down
|
||||
* and signal our listener that we can safely take over.
|
||||
*/
|
||||
void server_control_arrived( struct server *serve )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
|
||||
if ( !serve->has_control ) {
|
||||
serve->has_control = 1;
|
||||
serve_signal_close( serve );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -99,6 +99,7 @@ void server_unlock_acl( struct server *serve );
|
||||
int server_is_mirroring( struct server * serve );
|
||||
void server_abandon_mirror( struct server * serve );
|
||||
|
||||
void server_unlink( struct server * serve );
|
||||
|
||||
int do_serve( struct server * );
|
||||
|
||||
|
@@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
# Open a server, accept a client, then we expect a single write
|
||||
# followed by an entrust. Disconnect after the entrust. We expect a
|
||||
# reconnection followed by a full mirror.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port, src_pid = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept
|
||||
|
||||
client.write_hello
|
||||
write_req = client.read_request
|
||||
data = client.read_data( write_req[:len] )
|
||||
client.write_reply( write_req[:handle], 0 )
|
||||
|
||||
entrust_req = client.read_request
|
||||
fail "Not an entrust" unless entrust_req[:type] == 65536
|
||||
client.close
|
||||
|
||||
client2 = server.accept
|
||||
client2.receive_mirror
|
||||
|
||||
|
||||
exit(0)
|
||||
|
@@ -3,7 +3,8 @@
|
||||
|
||||
# Open a server, accept a client, then we expect a single write
|
||||
# followed by an entrust. However, we disconnect after the write so
|
||||
# the entrust will fail. We expect a reconnection.
|
||||
# the entrust will fail. We don't expect a reconnection: the sender
|
||||
# can't reliably spot a failed send.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
@@ -21,7 +22,4 @@ client.write_reply( req[:handle], 0 )
|
||||
client.close
|
||||
Process.kill("CONT", src_pid.to_i)
|
||||
|
||||
client2 = server.accept
|
||||
client2.close
|
||||
|
||||
exit(0)
|
||||
|
@@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
# Receive a mirror, but respond to the entrust with an error. There's
|
||||
# currently no code path in flexnbd which can do this, but we could
|
||||
# add one.
|
||||
|
||||
require 'flexnbd/fake_dest'
|
||||
include FlexNBD
|
||||
|
||||
addr, port = *ARGV
|
||||
server = FakeDest.new( addr, port )
|
||||
client = server.accept
|
||||
|
||||
client.write_hello
|
||||
loop do
|
||||
req = client.read_request
|
||||
if req[:type] == 1
|
||||
client.read_data( req[:len] )
|
||||
client.write_reply( req[:handle] )
|
||||
else
|
||||
client.write_reply( req[:handle], 1 )
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
client.close
|
||||
|
||||
client2 = server.accept( "Timed out waiting for a reconnection" )
|
||||
|
||||
client2.close
|
||||
server.close
|
||||
|
||||
exit(0)
|
@@ -93,8 +93,6 @@ module FlexNBD
|
||||
write_write_request( 0, 8 )
|
||||
write_data( "12345678" )
|
||||
read_response()
|
||||
write_entrust_request()
|
||||
read_response()
|
||||
write_disconnect_request()
|
||||
close()
|
||||
end
|
||||
|
@@ -58,10 +58,6 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
run_fake( "source/close_after_write" )
|
||||
end
|
||||
|
||||
def test_disconnect_before_entrust_reply_causes_error
|
||||
run_fake( "source/close_after_entrust" )
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_before_write_reply_causes_error
|
||||
# Note that this is an odd case: writing the reply doesn't fail.
|
||||
@@ -71,14 +67,6 @@ class TestDestErrorHandling < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
|
||||
def test_disconnect_after_entrust_reply_causes_error
|
||||
@env.nbd1.can_die(0)
|
||||
# This fake runs a failed migration then a succeeding one, so we
|
||||
# expect the destination to take control.
|
||||
run_fake( "source/close_after_entrust_reply" )
|
||||
assert_control
|
||||
end
|
||||
|
||||
|
||||
def test_straight_migration
|
||||
@env.nbd1.can_die(0)
|
||||
|
@@ -72,13 +72,14 @@ class TestHappyPath < Test::Unit::TestCase
|
||||
@env.listen2
|
||||
|
||||
@env.nbd1.can_die
|
||||
@env.nbd2.can_die(0)
|
||||
stdout, stderr = @env.mirror12
|
||||
|
||||
@env.nbd1.join
|
||||
@env.nbd2.join
|
||||
|
||||
assert_equal(@env.file1.read_original( 0, @env.blocksize ),
|
||||
@env.file2.read( 0, @env.blocksize ) )
|
||||
assert @env.status2['has_control'], "destination didn't take control"
|
||||
end
|
||||
|
||||
|
||||
|
@@ -79,17 +79,6 @@ class TestSourceErrorHandling < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
|
||||
def test_post_entrust_disconnect_causes_retry
|
||||
@env.nbd1.can_die(0)
|
||||
run_fake( "dest/close_after_entrust" )
|
||||
end
|
||||
|
||||
|
||||
def test_entrust_error_causes_retry
|
||||
run_fake( "dest/error_on_entrust" )
|
||||
end
|
||||
|
||||
|
||||
def test_cancel_migration
|
||||
run_fake( "dest/break_after_hello" )
|
||||
end
|
||||
|
Reference in New Issue
Block a user